summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Hsieh <andrewhsieh@google.com>2014-06-17 17:52:38 +0000
committerAndroid Git Automerger <android-git-automerger@android.com>2014-06-17 17:52:38 +0000
commit69cb72cbf200634691d59c916724c2dd5dc13b76 (patch)
tree824215375d3cffda4ec2bfd478dd8e045dc36f91
parent49a6724bb275c20c7fa36d6ae43faeed648853fc (diff)
parente99278016e6285363bc20d1b35d4b9b5c4e8b0a0 (diff)
downloadx86_64-linux-android-4.9-lollipop-mr1-dev.tar.gz
am e9927801: Add x86_64 toolchain x86_64-linux-android-4.9android-cts-5.1_r9android-cts-5.1_r8android-cts-5.1_r7android-cts-5.1_r6android-cts-5.1_r5android-cts-5.1_r4android-cts-5.1_r3android-cts-5.1_r28android-cts-5.1_r27android-cts-5.1_r26android-cts-5.1_r25android-cts-5.1_r24android-cts-5.1_r23android-cts-5.1_r22android-cts-5.1_r21android-cts-5.1_r20android-cts-5.1_r2android-cts-5.1_r19android-cts-5.1_r18android-cts-5.1_r17android-cts-5.1_r16android-cts-5.1_r15android-cts-5.1_r14android-cts-5.1_r13android-cts-5.1_r10android-cts-5.1_r1android-cts-5.0_r9android-cts-5.0_r8android-cts-5.0_r7android-cts-5.0_r6android-cts-5.0_r5android-cts-5.0_r4android-cts-5.0_r3android-5.1.1_r9android-5.1.1_r8android-5.1.1_r7android-5.1.1_r6android-5.1.1_r5android-5.1.1_r4android-5.1.1_r38android-5.1.1_r37android-5.1.1_r36android-5.1.1_r35android-5.1.1_r34android-5.1.1_r33android-5.1.1_r30android-5.1.1_r3android-5.1.1_r29android-5.1.1_r28android-5.1.1_r26android-5.1.1_r25android-5.1.1_r24android-5.1.1_r23android-5.1.1_r22android-5.1.1_r20android-5.1.1_r2android-5.1.1_r19android-5.1.1_r18android-5.1.1_r17android-5.1.1_r16android-5.1.1_r15android-5.1.1_r14android-5.1.1_r13android-5.1.1_r12android-5.1.1_r10android-5.1.1_r1android-5.1.0_r5android-5.1.0_r4android-5.1.0_r3android-5.1.0_r1android-5.0.2_r3android-5.0.2_r1android-5.0.1_r1android-5.0.0_r7android-5.0.0_r6android-5.0.0_r5.1android-5.0.0_r5android-5.0.0_r4android-5.0.0_r3android-5.0.0_r2android-5.0.0_r1lollipop-releaselollipop-mr1-wfc-releaselollipop-mr1-releaselollipop-mr1-fi-releaselollipop-mr1-devlollipop-mr1-cts-releaselollipop-devlollipop-cts-release
* commit 'e99278016e6285363bc20d1b35d4b9b5c4e8b0a0': Add x86_64 toolchain x86_64-linux-android-4.9 Initial empty repository
-rw-r--r--COPYING340
-rw-r--r--COPYING.LIB510
-rw-r--r--SOURCES21
-rwxr-xr-xbin/x86_64-linux-android-addr2linebin0 -> 871952 bytes
-rwxr-xr-xbin/x86_64-linux-android-arbin0 -> 900536 bytes
-rwxr-xr-xbin/x86_64-linux-android-asbin0 -> 1528696 bytes
l---------bin/x86_64-linux-android-c++1
-rwxr-xr-xbin/x86_64-linux-android-c++filtbin0 -> 867472 bytes
-rwxr-xr-xbin/x86_64-linux-android-cppbin0 -> 780312 bytes
-rwxr-xr-xbin/x86_64-linux-android-dwpbin0 -> 2720504 bytes
-rwxr-xr-xbin/x86_64-linux-android-elfeditbin0 -> 27928 bytes
-rwxr-xr-xbin/x86_64-linux-android-g++bin0 -> 781144 bytes
-rwxr-xr-xbin/x86_64-linux-android-gccbin0 -> 780312 bytes
l---------bin/x86_64-linux-android-gcc-4.91
-rwxr-xr-xbin/x86_64-linux-android-gcc-arbin0 -> 25496 bytes
-rwxr-xr-xbin/x86_64-linux-android-gcc-nmbin0 -> 25464 bytes
-rwxr-xr-xbin/x86_64-linux-android-gcc-ranlibbin0 -> 25464 bytes
-rwxr-xr-xbin/x86_64-linux-android-gcovbin0 -> 417424 bytes
-rwxr-xr-xbin/x86_64-linux-android-gdbbin0 -> 7101912 bytes
-rwxr-xr-xbin/x86_64-linux-android-gprofbin0 -> 934608 bytes
l---------bin/x86_64-linux-android-ld1
-rwxr-xr-xbin/x86_64-linux-android-ld.bfdbin0 -> 1673944 bytes
-rwxr-xr-xbin/x86_64-linux-android-ld.goldbin0 -> 4028072 bytes
l---------bin/x86_64-linux-android-ld.mcld1
-rwxr-xr-xbin/x86_64-linux-android-nmbin0 -> 880784 bytes
-rwxr-xr-xbin/x86_64-linux-android-objcopybin0 -> 1055760 bytes
-rwxr-xr-xbin/x86_64-linux-android-objdumpbin0 -> 1927792 bytes
-rwxr-xr-xbin/x86_64-linux-android-ranlibbin0 -> 900568 bytes
-rwxr-xr-xbin/x86_64-linux-android-readelfbin0 -> 408032 bytes
-rwxr-xr-xbin/x86_64-linux-android-sizebin0 -> 871856 bytes
-rwxr-xr-xbin/x86_64-linux-android-stringsbin0 -> 871856 bytes
-rwxr-xr-xbin/x86_64-linux-android-stripbin0 -> 1055760 bytes
-rw-r--r--include/gdb/jit-reader.h346
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtbegin.obin0 -> 2584 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtbeginS.obin0 -> 2788 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtbeginT.obin0 -> 2584 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtend.obin0 -> 938 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtendS.obin0 -> 938 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtfastmath.obin0 -> 4732 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtprec32.obin0 -> 2472 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtprec64.obin0 -> 2484 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/crtprec80.obin0 -> 2472 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/libgcc.abin0 -> 5179854 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/32/libgcov.abin0 -> 141042 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtbegin.obin0 -> 3288 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtbeginS.obin0 -> 3624 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtbeginT.obin0 -> 3288 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtend.obin0 -> 1354 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtendS.obin0 -> 1354 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtfastmath.obin0 -> 3920 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtprec32.obin0 -> 3520 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtprec64.obin0 -> 3528 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/crtprec80.obin0 -> 3520 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include-fixed/README14
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include-fixed/limits.h171
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include-fixed/linux/a.out.h235
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include-fixed/stdio.h465
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include-fixed/syslimits.h8
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/adxintrin.h49
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/ammintrin.h93
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/arm_neon.h8643
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avx2intrin.h1889
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avx512cdintrin.h184
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avx512erintrin.h394
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avx512fintrin.h12915
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avx512pfintrin.h212
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/avxintrin.h1463
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/bmi2intrin.h109
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/bmiintrin.h184
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/bmmintrin.h29
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/cpuid.h277
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/cross-stdarg.h72
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/emmintrin.h1541
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/f16cintrin.h98
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/float.h277
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/fma4intrin.h241
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/fmaintrin.h302
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/fxsrintrin.h73
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/ia32intrin.h293
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/immintrin.h177
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/iso646.h45
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/lwpintrin.h105
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/lzcntintrin.h75
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/mm3dnow.h218
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/mm_malloc.h63
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/mmintrin.h942
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/nmmintrin.h33
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/omp.h127
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/pmmintrin.h132
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/popcntintrin.h53
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/prfchwintrin.h37
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/rdseedintrin.h66
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/rtmintrin.h84
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/shaintrin.h98
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/smmintrin.h862
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdalign.h39
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdarg.h126
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdatomic.h252
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdbool.h50
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stddef.h439
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdfix.h204
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdint-gcc.h263
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdint.h14
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/stdnoreturn.h35
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/tbmintrin.h180
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/tmmintrin.h249
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/unwind.h293
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/varargs.h7
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/wmmintrin.h132
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/x86intrin.h78
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/xmmintrin.h1265
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/xopintrin.h844
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/xsaveintrin.h72
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/xsaveoptintrin.h58
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/include/xtestintrin.h51
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/libgcc.abin0 -> 7240024 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/libgcov.abin0 -> 198306 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtbegin.obin0 -> 2352 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtbeginS.obin0 -> 2572 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtbeginT.obin0 -> 2352 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtend.obin0 -> 938 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtendS.obin0 -> 938 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtfastmath.obin0 -> 2776 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtprec32.obin0 -> 2520 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtprec64.obin0 -> 2536 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/crtprec80.obin0 -> 2520 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/libgcc.abin0 -> 6003508 bytes
-rw-r--r--lib/gcc/x86_64-linux-android/4.9/x32/libgcov.abin0 -> 146934 bytes
-rw-r--r--lib64/libiberty.abin0 -> 437914 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/cc1bin0 -> 17356112 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/cc1plusbin0 -> 18478480 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/collect2bin0 -> 438168 bytes
l---------libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so1
l---------libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.01
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0.0.0bin0 -> 86771 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/lto-wrapperbin0 -> 634600 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/lto1bin0 -> 16626216 bytes
-rwxr-xr-xlibexec/gcc/x86_64-linux-android/4.9/plugin/gengtypebin0 -> 531376 bytes
-rw-r--r--share/gdb/python/gdb/__init__.py124
-rw-r--r--share/gdb/python/gdb/command/__init__.py16
-rw-r--r--share/gdb/python/gdb/command/explore.py760
-rw-r--r--share/gdb/python/gdb/command/pretty_printers.py368
-rw-r--r--share/gdb/python/gdb/command/prompt.py66
-rw-r--r--share/gdb/python/gdb/command/type_printers.py125
-rw-r--r--share/gdb/python/gdb/function/__init__.py14
-rw-r--r--share/gdb/python/gdb/function/strfns.py108
-rw-r--r--share/gdb/python/gdb/printing.py263
-rw-r--r--share/gdb/python/gdb/prompt.py148
-rw-r--r--share/gdb/python/gdb/types.py176
-rw-r--r--share/gdb/syscalls/amd64-linux.xml314
-rw-r--r--share/gdb/syscalls/gdb-syscalls.dtd14
-rw-r--r--share/gdb/syscalls/i386-linux.xml340
-rw-r--r--share/gdb/syscalls/mips-n32-linux.xml319
-rw-r--r--share/gdb/syscalls/mips-n64-linux.xml312
-rw-r--r--share/gdb/syscalls/mips-o32-linux.xml347
-rw-r--r--share/gdb/syscalls/ppc-linux.xml310
-rw-r--r--share/gdb/syscalls/ppc64-linux.xml295
-rw-r--r--share/gdb/syscalls/sparc-linux.xml344
-rw-r--r--share/gdb/syscalls/sparc64-linux.xml326
l---------x86_64-linux-android/bin/ar1
l---------x86_64-linux-android/bin/as1
l---------x86_64-linux-android/bin/ld1
l---------x86_64-linux-android/bin/ld.bfd1
l---------x86_64-linux-android/bin/ld.gold1
l---------x86_64-linux-android/bin/ld.mcld1
l---------x86_64-linux-android/bin/nm1
l---------x86_64-linux-android/bin/objcopy1
l---------x86_64-linux-android/bin/objdump1
l---------x86_64-linux-android/bin/ranlib1
l---------x86_64-linux-android/bin/strip1
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.x227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xbn224
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xc228
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xd226
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdc228
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdw227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xn226
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xr154
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xs217
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsc220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsw218
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xu155
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf32_x86_64.xw227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.x209
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xbn206
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xc211
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xd208
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xdc211
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xdw210
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xn208
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xr137
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xs199
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xsc203
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xsw201
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xu138
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_i386.xw210
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.x230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xbn227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xc230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xd229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xdc230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xdw229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xn229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xr158
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xs220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xsc220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xsw218
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xu159
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_k1om.xw229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.x230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xbn227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xc230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xd229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xdc230
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xdw229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xn229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xr158
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xs220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xsc220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xsw218
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xu159
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_l1om.xw229
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.x227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xbn224
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xc228
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xd226
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xdc228
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xdw227
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xn226
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xr154
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xs217
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xsc220
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xsw218
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xu155
-rw-r--r--x86_64-linux-android/lib/ldscripts/elf_x86_64.xw227
-rw-r--r--x86_64-linux-android/lib/ldscripts/i386linux.x47
-rw-r--r--x86_64-linux-android/lib/ldscripts/i386linux.xbn46
-rw-r--r--x86_64-linux-android/lib/ldscripts/i386linux.xn46
-rw-r--r--x86_64-linux-android/lib/ldscripts/i386linux.xr37
-rw-r--r--x86_64-linux-android/lib/ldscripts/i386linux.xu38
-rw-r--r--x86_64-linux-android/lib/libatomic.abin0 -> 176148 bytes
-rw-r--r--x86_64-linux-android/lib/libgomp.abin0 -> 439562 bytes
-rw-r--r--x86_64-linux-android/lib/libgomp.spec3
-rw-r--r--x86_64-linux-android/lib64/libatomic.abin0 -> 299644 bytes
-rw-r--r--x86_64-linux-android/lib64/libgomp.abin0 -> 671510 bytes
-rw-r--r--x86_64-linux-android/lib64/libgomp.spec3
-rw-r--r--x86_64-linux-android/libx32/libatomic.abin0 -> 222388 bytes
-rw-r--r--x86_64-linux-android/libx32/libgomp.abin0 -> 477664 bytes
-rw-r--r--x86_64-linux-android/libx32/libgomp.spec3
249 files changed, 58266 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..623b625
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/COPYING.LIB b/COPYING.LIB
new file mode 100644
index 0000000..2d2d780
--- /dev/null
+++ b/COPYING.LIB
@@ -0,0 +1,510 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard. To achieve this, non-free programs must
+be allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least
+ three years, to give the same user the materials specified in
+ Subsection 6a, above, for a charge no more than the cost of
+ performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+ Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/SOURCES b/SOURCES
new file mode 100644
index 0000000..3c437f9
--- /dev/null
+++ b/SOURCES
@@ -0,0 +1,21 @@
+toolchain/build.git 75542e77b565c2af968e48c1b12b32f343d913ae Fix binutils-2.24 detection
+toolchain/gmp.git e6b9669dafc6a5f83c80b4b4176359b78bccdc90 Add gmp-5.0.5.tar.bz2
+toolchain/mpfr.git bfcf1bfa38469208aaad8873cd4c68781061d90f add mpfr-3.1.1.tar.bz2
+toolchain/mpc.git 835d16e92eed875638a8b5d552034c3b1aae045b add mpc-1.0.1.tar.gz
+toolchain/cloog.git 98972d5434ffcb4d11d2c81a46600e9a1cda9110 MinGW-w64 build fix (lacks ffs declaration)
+toolchain/isl.git b05d4572958c5d497da793f3317084bab90c3033 add isl-0.11.1.tar.bz2 needed by GCC 4.8 with graphite
+toolchain/ppl.git 8ba1875b4c5341d902321761022a6d2a0b5b19a4 add ppl-1.0.tar.bz2
+toolchain/expat.git 40172a0ae9d40a068f1e1a48ffcf6a1ccf765ed5 expat package for building gdb-7.3
+toolchain/binutils.git 2a6558a8ecfb81d75215b4ec7dc61113e12cfd5f Merge "[2.24] Fix assert failure with --emit-relocs and .eh_frame sections by backporting the following patch from mainline:"
+toolchain/gcc.git 8f2898c09773292b6ba523cee7e528a320ff1e02 Merge "[4.8, 4.9] Fix aarch64/arm_neon.h vqdmulh_n_s16"
+toolchain/gdb.git df2206c67d812619976f49319911ed12c8220b11 Fix broken x86_64 gdbserver build
+toolchain/python.git 0d4194853e08d3244931523470331c00dfb94863 Fix python build inc_dirs[] and lib_dirs[] for linux/darwin
+toolchain/perl.git 1121daca35c6c692602621eab28d4de19f0b347d Add -Dcc_as_ld to configure
+toolchain/mclinker.git 5fca8b9c9c671d6c01f428c00ca131e65042a9fd Merge upstream mclinker 2.7
+toolchain/yasm.git 87c09baff80ca5bbe938392d8f320e621707f317 test commit
+toolchain/clang.git (release_34) 32d5296021ee140d3afa467ef54de7f9daf9c9c1 Backport clang svn@r207520
+toolchain/llvm.git (release_34) 4e3e3137a7d3c271f5190ac9ef8af7848e5f67e2 Fix missing change for expandVAArg.
+toolchain/compiler-rt.git (release_34) b065fccd8ab47b4de4610093fb98514fc5df2362 Alias __aeabi_fcmpun to __unordsf2.
+toolchain/clang.git (release_33) 260f965f1273cd0aa8a1270885519df8e6317062 [ndk] Fix diagnostics for C-style cast to function type.
+toolchain/llvm.git (release_33) 8609a3469a8126eb6fb99ff65906bcc20c272d95 [ndk] Fix createBranchWeights() assertions.
+toolchain/compiler-rt.git (release_33) c880feaaa8829681a025d29a33704c18e21e87e1 Misc fixes for compiler_rt
diff --git a/bin/x86_64-linux-android-addr2line b/bin/x86_64-linux-android-addr2line
new file mode 100755
index 0000000..a68613f
--- /dev/null
+++ b/bin/x86_64-linux-android-addr2line
Binary files differ
diff --git a/bin/x86_64-linux-android-ar b/bin/x86_64-linux-android-ar
new file mode 100755
index 0000000..a7224ad
--- /dev/null
+++ b/bin/x86_64-linux-android-ar
Binary files differ
diff --git a/bin/x86_64-linux-android-as b/bin/x86_64-linux-android-as
new file mode 100755
index 0000000..b71340b
--- /dev/null
+++ b/bin/x86_64-linux-android-as
Binary files differ
diff --git a/bin/x86_64-linux-android-c++ b/bin/x86_64-linux-android-c++
new file mode 120000
index 0000000..425d82a
--- /dev/null
+++ b/bin/x86_64-linux-android-c++
@@ -0,0 +1 @@
+x86_64-linux-android-g++ \ No newline at end of file
diff --git a/bin/x86_64-linux-android-c++filt b/bin/x86_64-linux-android-c++filt
new file mode 100755
index 0000000..d7f5957
--- /dev/null
+++ b/bin/x86_64-linux-android-c++filt
Binary files differ
diff --git a/bin/x86_64-linux-android-cpp b/bin/x86_64-linux-android-cpp
new file mode 100755
index 0000000..2b5e18b
--- /dev/null
+++ b/bin/x86_64-linux-android-cpp
Binary files differ
diff --git a/bin/x86_64-linux-android-dwp b/bin/x86_64-linux-android-dwp
new file mode 100755
index 0000000..ae89a89
--- /dev/null
+++ b/bin/x86_64-linux-android-dwp
Binary files differ
diff --git a/bin/x86_64-linux-android-elfedit b/bin/x86_64-linux-android-elfedit
new file mode 100755
index 0000000..0ed1183
--- /dev/null
+++ b/bin/x86_64-linux-android-elfedit
Binary files differ
diff --git a/bin/x86_64-linux-android-g++ b/bin/x86_64-linux-android-g++
new file mode 100755
index 0000000..41b1a5a
--- /dev/null
+++ b/bin/x86_64-linux-android-g++
Binary files differ
diff --git a/bin/x86_64-linux-android-gcc b/bin/x86_64-linux-android-gcc
new file mode 100755
index 0000000..40e37b8
--- /dev/null
+++ b/bin/x86_64-linux-android-gcc
Binary files differ
diff --git a/bin/x86_64-linux-android-gcc-4.9 b/bin/x86_64-linux-android-gcc-4.9
new file mode 120000
index 0000000..c953618
--- /dev/null
+++ b/bin/x86_64-linux-android-gcc-4.9
@@ -0,0 +1 @@
+x86_64-linux-android-gcc \ No newline at end of file
diff --git a/bin/x86_64-linux-android-gcc-ar b/bin/x86_64-linux-android-gcc-ar
new file mode 100755
index 0000000..721d47b
--- /dev/null
+++ b/bin/x86_64-linux-android-gcc-ar
Binary files differ
diff --git a/bin/x86_64-linux-android-gcc-nm b/bin/x86_64-linux-android-gcc-nm
new file mode 100755
index 0000000..22a8e20
--- /dev/null
+++ b/bin/x86_64-linux-android-gcc-nm
Binary files differ
diff --git a/bin/x86_64-linux-android-gcc-ranlib b/bin/x86_64-linux-android-gcc-ranlib
new file mode 100755
index 0000000..f702a73
--- /dev/null
+++ b/bin/x86_64-linux-android-gcc-ranlib
Binary files differ
diff --git a/bin/x86_64-linux-android-gcov b/bin/x86_64-linux-android-gcov
new file mode 100755
index 0000000..659aeab
--- /dev/null
+++ b/bin/x86_64-linux-android-gcov
Binary files differ
diff --git a/bin/x86_64-linux-android-gdb b/bin/x86_64-linux-android-gdb
new file mode 100755
index 0000000..267a5bd
--- /dev/null
+++ b/bin/x86_64-linux-android-gdb
Binary files differ
diff --git a/bin/x86_64-linux-android-gprof b/bin/x86_64-linux-android-gprof
new file mode 100755
index 0000000..e5a1fc9
--- /dev/null
+++ b/bin/x86_64-linux-android-gprof
Binary files differ
diff --git a/bin/x86_64-linux-android-ld b/bin/x86_64-linux-android-ld
new file mode 120000
index 0000000..3d3aa39
--- /dev/null
+++ b/bin/x86_64-linux-android-ld
@@ -0,0 +1 @@
+x86_64-linux-android-ld.gold \ No newline at end of file
diff --git a/bin/x86_64-linux-android-ld.bfd b/bin/x86_64-linux-android-ld.bfd
new file mode 100755
index 0000000..e62c0da
--- /dev/null
+++ b/bin/x86_64-linux-android-ld.bfd
Binary files differ
diff --git a/bin/x86_64-linux-android-ld.gold b/bin/x86_64-linux-android-ld.gold
new file mode 100755
index 0000000..4778b05
--- /dev/null
+++ b/bin/x86_64-linux-android-ld.gold
Binary files differ
diff --git a/bin/x86_64-linux-android-ld.mcld b/bin/x86_64-linux-android-ld.mcld
new file mode 120000
index 0000000..ef0dfe4
--- /dev/null
+++ b/bin/x86_64-linux-android-ld.mcld
@@ -0,0 +1 @@
+../../../../../toolchains/llvm-3.4/prebuilt/linux-x86_64/bin/ld.mcld \ No newline at end of file
diff --git a/bin/x86_64-linux-android-nm b/bin/x86_64-linux-android-nm
new file mode 100755
index 0000000..e087950
--- /dev/null
+++ b/bin/x86_64-linux-android-nm
Binary files differ
diff --git a/bin/x86_64-linux-android-objcopy b/bin/x86_64-linux-android-objcopy
new file mode 100755
index 0000000..d3564fa
--- /dev/null
+++ b/bin/x86_64-linux-android-objcopy
Binary files differ
diff --git a/bin/x86_64-linux-android-objdump b/bin/x86_64-linux-android-objdump
new file mode 100755
index 0000000..03e3d8e
--- /dev/null
+++ b/bin/x86_64-linux-android-objdump
Binary files differ
diff --git a/bin/x86_64-linux-android-ranlib b/bin/x86_64-linux-android-ranlib
new file mode 100755
index 0000000..2776b20
--- /dev/null
+++ b/bin/x86_64-linux-android-ranlib
Binary files differ
diff --git a/bin/x86_64-linux-android-readelf b/bin/x86_64-linux-android-readelf
new file mode 100755
index 0000000..6cde6c5
--- /dev/null
+++ b/bin/x86_64-linux-android-readelf
Binary files differ
diff --git a/bin/x86_64-linux-android-size b/bin/x86_64-linux-android-size
new file mode 100755
index 0000000..fe1383d
--- /dev/null
+++ b/bin/x86_64-linux-android-size
Binary files differ
diff --git a/bin/x86_64-linux-android-strings b/bin/x86_64-linux-android-strings
new file mode 100755
index 0000000..fe096d7
--- /dev/null
+++ b/bin/x86_64-linux-android-strings
Binary files differ
diff --git a/bin/x86_64-linux-android-strip b/bin/x86_64-linux-android-strip
new file mode 100755
index 0000000..9640255
--- /dev/null
+++ b/bin/x86_64-linux-android-strip
Binary files differ
diff --git a/include/gdb/jit-reader.h b/include/gdb/jit-reader.h
new file mode 100644
index 0000000..7cff81a
--- /dev/null
+++ b/include/gdb/jit-reader.h
@@ -0,0 +1,346 @@
+/* JIT declarations for GDB, the GNU Debugger.
+
+ Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef GDB_JIT_READER_H
+#define GDB_JIT_READER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Versioning information. See gdb_reader_funcs. */
+
+#define GDB_READER_INTERFACE_VERSION 1
+
+/* Readers must be released under a GPL compatible license. To
+ declare that the reader is indeed released under a GPL compatible
+ license, invoke the macro GDB_DECLARE_GPL_COMPATIBLE in a source
+ file. */
+
+#ifdef __cplusplus
+#define GDB_DECLARE_GPL_COMPATIBLE_READER \
+ extern "C" { \
+ extern int plugin_is_GPL_compatible (void); \
+ extern int plugin_is_GPL_compatible (void) \
+ { \
+ return 0; \
+ } \
+ }
+
+#else
+
+#define GDB_DECLARE_GPL_COMPATIBLE_READER \
+ extern int plugin_is_GPL_compatible (void); \
+ extern int plugin_is_GPL_compatible (void) \
+ { \
+ return 0; \
+ }
+
+#endif
+
+/* Represents an address on the target system. */
+
+typedef unsigned long GDB_CORE_ADDR;
+
+/* Return status codes. */
+
+enum gdb_status {
+ GDB_FAIL = 0,
+ GDB_SUCCESS = 1
+};
+
+struct gdb_object;
+struct gdb_symtab;
+struct gdb_block;
+struct gdb_symbol_callbacks;
+
+/* An array of these are used to represent a map from code addresses to line
+ numbers in the source file. */
+
+struct gdb_line_mapping
+{
+ int line;
+ GDB_CORE_ADDR pc;
+};
+
+/* Create a new GDB code object. Each code object can have one or
+ more symbol tables, each representing a compiled source file. */
+
+typedef struct gdb_object *(gdb_object_open) (struct gdb_symbol_callbacks *cb);
+
+/* The callback used to create new symbol table. CB is the
+ gdb_symbol_callbacks which the structure is part of. FILE_NAME is
+ an (optionally NULL) file name to associate with this new symbol
+ table.
+
+ Returns a new instance to gdb_symtab that can later be passed to
+ gdb_block_new, gdb_symtab_add_line_mapping and gdb_symtab_close. */
+
+typedef struct gdb_symtab *(gdb_symtab_open) (struct gdb_symbol_callbacks *cb,
+ struct gdb_object *obj,
+ const char *file_name);
+
+/* Creates a new block in a given symbol table. A symbol table is a
+ forest of blocks, each block representing an code address range and
+ a corresponding (optionally NULL) NAME. In case the block
+ corresponds to a function, the NAME passed should be the name of
+ the function.
+
+ If the new block to be created is a child of (i.e. is nested in)
+ another block, the parent block can be passed in PARENT. SYMTAB is
+ the symbol table the new block is to belong in. BEGIN, END is the
+ code address range the block corresponds to.
+
+ Returns a new instance of gdb_block, which, as of now, has no use.
+ Note that the gdb_block returned must not be freed by the
+ caller. */
+
+typedef struct gdb_block *(gdb_block_open) (struct gdb_symbol_callbacks *cb,
+ struct gdb_symtab *symtab,
+ struct gdb_block *parent,
+ GDB_CORE_ADDR begin,
+ GDB_CORE_ADDR end,
+ const char *name);
+
+/* Adds a PC to line number mapping for the symbol table SYMTAB.
+ NLINES is the number of elements in LINES, each element
+ corresponding to one (PC, line) pair. */
+
+typedef void (gdb_symtab_add_line_mapping) (struct gdb_symbol_callbacks *cb,
+ struct gdb_symtab *symtab,
+ int nlines,
+ struct gdb_line_mapping *lines);
+
+/* Close the symtab SYMTAB. This signals to GDB that no more blocks
+ will be opened on this symtab. */
+
+typedef void (gdb_symtab_close) (struct gdb_symbol_callbacks *cb,
+ struct gdb_symtab *symtab);
+
+
+/* Closes the gdb_object OBJ and adds the emitted information into
+ GDB's internal structures. Once this is done, the debug
+ information will be picked up and used; this will usually be the
+ last operation in gdb_read_debug_info. */
+
+typedef void (gdb_object_close) (struct gdb_symbol_callbacks *cb,
+ struct gdb_object *obj);
+
+/* Reads LEN bytes from TARGET_MEM in the target's virtual address
+ space into GDB_BUF.
+
+ Returns GDB_FAIL on failure, and GDB_SUCCESS on success. */
+
+typedef enum gdb_status (gdb_target_read) (GDB_CORE_ADDR target_mem,
+ void *gdb_buf, int len);
+
+/* The list of callbacks that are passed to read. These callbacks are
+ to be used to construct the symbol table. The functions have been
+ described above. */
+
+struct gdb_symbol_callbacks
+{
+ gdb_object_open *object_open;
+ gdb_symtab_open *symtab_open;
+ gdb_block_open *block_open;
+ gdb_symtab_close *symtab_close;
+ gdb_object_close *object_close;
+
+ gdb_symtab_add_line_mapping *line_mapping_add;
+ gdb_target_read *target_read;
+
+ /* For internal use by GDB. */
+ void *priv_data;
+};
+
+/* Forward declaration. */
+
+struct gdb_reg_value;
+
+/* A function of this type is used to free a gdb_reg_value. See the
+ comment on `free' in struct gdb_reg_value. */
+
+typedef void (gdb_reg_value_free) (struct gdb_reg_value *);
+
+/* Denotes the value of a register. */
+
+struct gdb_reg_value
+{
+ /* The size of the register in bytes. The reader need not set this
+ field. This will be set for (defined) register values being read
+ from GDB using reg_get. */
+ int size;
+
+ /* Set to non-zero if the value for the register is known. The
+ registers for which the reader does not call reg_set are also
+ assumed to be undefined */
+ int defined;
+
+ /* Since gdb_reg_value is a variable sized structure, it will
+ usually be allocated on the heap. This function is expected to
+ contain the corresponding "free" function.
+
+ When a pointer to gdb_reg_value is being sent from GDB to the
+ reader (via gdb_unwind_reg_get), the reader is expected to call
+ this function (with the same gdb_reg_value as argument) once it
+ is done with the value.
+
+ When the function sends the a gdb_reg_value to GDB (via
+ gdb_unwind_reg_set), it is expected to set this field to point to
+ an appropriate cleanup routine (or to NULL if no cleanup is
+ required). */
+ gdb_reg_value_free *free;
+
+ /* The value of the register. */
+ unsigned char value[1];
+};
+
+/* get_frame_id in gdb_reader_funcs is to return a gdb_frame_id
+ corresponding to the current frame. The registers corresponding to
+ the current frame can be read using reg_get. Calling get_frame_id
+ on a particular frame should return the same gdb_frame_id
+ throughout its lifetime (i.e. till before it gets unwound). One
+ way to do this is by having the CODE_ADDRESS point to the
+ function's first instruction and STACK_ADDRESS point to the value
+ of the stack pointer when entering the function. */
+
+struct gdb_frame_id
+{
+ GDB_CORE_ADDR code_address;
+ GDB_CORE_ADDR stack_address;
+};
+
+/* Forward declaration. */
+
+struct gdb_unwind_callbacks;
+
+/* Returns the value of a particular register in the current frame.
+ The current frame is the frame that needs to be unwound into the
+ outer (earlier) frame.
+
+ CB is the struct gdb_unwind_callbacks * the callback belongs to.
+ REGNUM is the DWARF register number of the register that needs to
+ be unwound.
+
+ Returns the gdb_reg_value corresponding to the register requested.
+ In case the value of the register has been optimized away or
+ otherwise unavailable, the defined flag in the returned
+ gdb_reg_value will be zero. */
+
+typedef struct gdb_reg_value *(gdb_unwind_reg_get)
+ (struct gdb_unwind_callbacks *cb, int regnum);
+
+/* Sets the previous value of a particular register. REGNUM is the
+ (DWARF) register number whose value is to be set. VAL is the value
+ the register is to be set to.
+
+ VAL is *not* copied, so the memory allocated to it cannot be
+ reused. Once GDB no longer needs the value, it is deallocated
+ using the FREE function (see gdb_reg_value).
+
+ A register can also be "set" to an undefined value by setting the
+ defined in VAL to zero. */
+
+typedef void (gdb_unwind_reg_set) (struct gdb_unwind_callbacks *cb, int regnum,
+ struct gdb_reg_value *val);
+
+/* This struct is passed to unwind in gdb_reader_funcs, and is to be
+ used to unwind the current frame (current being the frame whose
+ registers can be read using reg_get) into the earlier frame. The
+ functions have been described above. */
+
+struct gdb_unwind_callbacks
+{
+ gdb_unwind_reg_get *reg_get;
+ gdb_unwind_reg_set *reg_set;
+ gdb_target_read *target_read;
+
+ /* For internal use by GDB. */
+ void *priv_data;
+};
+
+/* Forward declaration. */
+
+struct gdb_reader_funcs;
+
+/* Parse the debug info off a block of memory, pointed to by MEMORY
+ (already copied to GDB's address space) and MEMORY_SZ bytes long.
+ The implementation has to use the functions in CB to actually emit
+ the parsed data into GDB. SELF is the same structure returned by
+ gdb_init_reader.
+
+ Return GDB_FAIL on failure and GDB_SUCCESS on success. */
+
+typedef enum gdb_status (gdb_read_debug_info) (struct gdb_reader_funcs *self,
+ struct gdb_symbol_callbacks *cb,
+ void *memory, long memory_sz);
+
+/* Unwind the current frame, CB is the set of unwind callbacks that
+ are to be used to do this.
+
+ Return GDB_FAIL on failure and GDB_SUCCESS on success. */
+
+typedef enum gdb_status (gdb_unwind_frame) (struct gdb_reader_funcs *self,
+ struct gdb_unwind_callbacks *cb);
+
+/* Return the frame ID corresponding to the current frame, using C to
+ read the current register values. See the comment on struct
+ gdb_frame_id. */
+
+typedef struct gdb_frame_id (gdb_get_frame_id) (struct gdb_reader_funcs *self,
+ struct gdb_unwind_callbacks *c);
+
+/* Called when a reader is being unloaded. This function should also
+ free SELF, if required. */
+
+typedef void (gdb_destroy_reader) (struct gdb_reader_funcs *self);
+
+/* Called when the reader is loaded. Must either return a properly
+ populated gdb_reader_funcs or NULL. The memory allocated for the
+ gdb_reader_funcs is to be managed by the reader itself (i.e. if it
+ is allocated from the heap, it must also be freed in
+ gdb_destroy_reader). */
+
+extern struct gdb_reader_funcs *gdb_init_reader (void);
+
+/* Pointer to the functions which implement the reader's
+ functionality. The individual functions have been documented
+ above.
+
+ None of the fields are optional. */
+
+struct gdb_reader_funcs
+{
+ /* Must be set to GDB_READER_INTERFACE_VERSION. */
+ int reader_version;
+
+ /* For use by the reader. */
+ void *priv_data;
+
+ gdb_read_debug_info *read;
+ gdb_unwind_frame *unwind;
+ gdb_get_frame_id *get_frame_id;
+ gdb_destroy_reader *destroy;
+};
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtbegin.o b/lib/gcc/x86_64-linux-android/4.9/32/crtbegin.o
new file mode 100644
index 0000000..a333bb8
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtbegin.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtbeginS.o b/lib/gcc/x86_64-linux-android/4.9/32/crtbeginS.o
new file mode 100644
index 0000000..a9ff81d
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtbeginS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtbeginT.o b/lib/gcc/x86_64-linux-android/4.9/32/crtbeginT.o
new file mode 100644
index 0000000..a333bb8
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtbeginT.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtend.o b/lib/gcc/x86_64-linux-android/4.9/32/crtend.o
new file mode 100644
index 0000000..687ae6f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtend.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtendS.o b/lib/gcc/x86_64-linux-android/4.9/32/crtendS.o
new file mode 100644
index 0000000..687ae6f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtendS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtfastmath.o b/lib/gcc/x86_64-linux-android/4.9/32/crtfastmath.o
new file mode 100644
index 0000000..d59dc6e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtfastmath.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtprec32.o b/lib/gcc/x86_64-linux-android/4.9/32/crtprec32.o
new file mode 100644
index 0000000..0614852
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtprec32.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtprec64.o b/lib/gcc/x86_64-linux-android/4.9/32/crtprec64.o
new file mode 100644
index 0000000..42138bf
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtprec64.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/crtprec80.o b/lib/gcc/x86_64-linux-android/4.9/32/crtprec80.o
new file mode 100644
index 0000000..a541e6f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/crtprec80.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/libgcc.a b/lib/gcc/x86_64-linux-android/4.9/32/libgcc.a
new file mode 100644
index 0000000..a6011ca
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/libgcc.a
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/32/libgcov.a b/lib/gcc/x86_64-linux-android/4.9/32/libgcov.a
new file mode 100644
index 0000000..b72d700
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/32/libgcov.a
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtbegin.o b/lib/gcc/x86_64-linux-android/4.9/crtbegin.o
new file mode 100644
index 0000000..2597da2
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtbegin.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtbeginS.o b/lib/gcc/x86_64-linux-android/4.9/crtbeginS.o
new file mode 100644
index 0000000..7ea0491
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtbeginS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtbeginT.o b/lib/gcc/x86_64-linux-android/4.9/crtbeginT.o
new file mode 100644
index 0000000..2597da2
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtbeginT.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtend.o b/lib/gcc/x86_64-linux-android/4.9/crtend.o
new file mode 100644
index 0000000..88e5daf
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtend.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtendS.o b/lib/gcc/x86_64-linux-android/4.9/crtendS.o
new file mode 100644
index 0000000..88e5daf
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtendS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtfastmath.o b/lib/gcc/x86_64-linux-android/4.9/crtfastmath.o
new file mode 100644
index 0000000..e5c1d09
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtfastmath.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtprec32.o b/lib/gcc/x86_64-linux-android/4.9/crtprec32.o
new file mode 100644
index 0000000..16ebe52
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtprec32.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtprec64.o b/lib/gcc/x86_64-linux-android/4.9/crtprec64.o
new file mode 100644
index 0000000..1724eb7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtprec64.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/crtprec80.o b/lib/gcc/x86_64-linux-android/4.9/crtprec80.o
new file mode 100644
index 0000000..89b83dc
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/crtprec80.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/include-fixed/README b/lib/gcc/x86_64-linux-android/4.9/include-fixed/README
new file mode 100644
index 0000000..7086a77
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include-fixed/README
@@ -0,0 +1,14 @@
+This README file is copied into the directory for GCC-only header files
+when fixincludes is run by the makefile for GCC.
+
+Many of the files in this directory were automatically edited from the
+standard system header files by the fixincludes process. They are
+system-specific, and will not work on any other kind of system. They
+are also not part of GCC. The reason we have to do this is because
+GCC requires ANSI C headers and many vendors supply ANSI-incompatible
+headers.
+
+Because this is an automated process, sometimes headers get "fixed"
+that do not, strictly speaking, need a fix. As long as nothing is broken
+by the process, it is just an unfortunate collateral inconvenience.
+We would like to rectify it, if it is not "too inconvenient".
diff --git a/lib/gcc/x86_64-linux-android/4.9/include-fixed/limits.h b/lib/gcc/x86_64-linux-android/4.9/include-fixed/limits.h
new file mode 100644
index 0000000..8c6a4d3
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include-fixed/limits.h
@@ -0,0 +1,171 @@
+/* Copyright (C) 1992-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* This administrivia gets added to the beginning of limits.h
+ if the system has its own version of limits.h. */
+
+/* We use _GCC_LIMITS_H_ because we want this not to match
+ any macros that the system's limits.h uses for its own purposes. */
+#ifndef _GCC_LIMITS_H_ /* Terminated in limity.h. */
+#define _GCC_LIMITS_H_
+
+#ifndef _LIBC_LIMITS_H_
+/* Use "..." so that we find syslimits.h only in this same directory. */
+#include "syslimits.h"
+#endif
+/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _LIMITS_H___
+#define _LIMITS_H___
+
+/* Number of bits in a `char'. */
+#undef CHAR_BIT
+#define CHAR_BIT __CHAR_BIT__
+
+/* Maximum length of a multibyte character. */
+#ifndef MB_LEN_MAX
+#define MB_LEN_MAX 1
+#endif
+
+/* Minimum and maximum values a `signed char' can hold. */
+#undef SCHAR_MIN
+#define SCHAR_MIN (-SCHAR_MAX - 1)
+#undef SCHAR_MAX
+#define SCHAR_MAX __SCHAR_MAX__
+
+/* Maximum value an `unsigned char' can hold. (Minimum is 0). */
+#undef UCHAR_MAX
+#if __SCHAR_MAX__ == __INT_MAX__
+# define UCHAR_MAX (SCHAR_MAX * 2U + 1U)
+#else
+# define UCHAR_MAX (SCHAR_MAX * 2 + 1)
+#endif
+
+/* Minimum and maximum values a `char' can hold. */
+#ifdef __CHAR_UNSIGNED__
+# undef CHAR_MIN
+# if __SCHAR_MAX__ == __INT_MAX__
+# define CHAR_MIN 0U
+# else
+# define CHAR_MIN 0
+# endif
+# undef CHAR_MAX
+# define CHAR_MAX UCHAR_MAX
+#else
+# undef CHAR_MIN
+# define CHAR_MIN SCHAR_MIN
+# undef CHAR_MAX
+# define CHAR_MAX SCHAR_MAX
+#endif
+
+/* Minimum and maximum values a `signed short int' can hold. */
+#undef SHRT_MIN
+#define SHRT_MIN (-SHRT_MAX - 1)
+#undef SHRT_MAX
+#define SHRT_MAX __SHRT_MAX__
+
+/* Maximum value an `unsigned short int' can hold. (Minimum is 0). */
+#undef USHRT_MAX
+#if __SHRT_MAX__ == __INT_MAX__
+# define USHRT_MAX (SHRT_MAX * 2U + 1U)
+#else
+# define USHRT_MAX (SHRT_MAX * 2 + 1)
+#endif
+
+/* Minimum and maximum values a `signed int' can hold. */
+#undef INT_MIN
+#define INT_MIN (-INT_MAX - 1)
+#undef INT_MAX
+#define INT_MAX __INT_MAX__
+
+/* Maximum value an `unsigned int' can hold. (Minimum is 0). */
+#undef UINT_MAX
+#define UINT_MAX (INT_MAX * 2U + 1U)
+
+/* Minimum and maximum values a `signed long int' can hold.
+ (Same as `int'). */
+#undef LONG_MIN
+#define LONG_MIN (-LONG_MAX - 1L)
+#undef LONG_MAX
+#define LONG_MAX __LONG_MAX__
+
+/* Maximum value an `unsigned long int' can hold. (Minimum is 0). */
+#undef ULONG_MAX
+#define ULONG_MAX (LONG_MAX * 2UL + 1UL)
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+/* Minimum and maximum values a `signed long long int' can hold. */
+# undef LLONG_MIN
+# define LLONG_MIN (-LLONG_MAX - 1LL)
+# undef LLONG_MAX
+# define LLONG_MAX __LONG_LONG_MAX__
+
+/* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */
+# undef ULLONG_MAX
+# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
+#endif
+
+#if defined (__GNU_LIBRARY__) ? defined (__USE_GNU) : !defined (__STRICT_ANSI__)
+/* Minimum and maximum values a `signed long long int' can hold. */
+# undef LONG_LONG_MIN
+# define LONG_LONG_MIN (-LONG_LONG_MAX - 1LL)
+# undef LONG_LONG_MAX
+# define LONG_LONG_MAX __LONG_LONG_MAX__
+
+/* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */
+# undef ULONG_LONG_MAX
+# define ULONG_LONG_MAX (LONG_LONG_MAX * 2ULL + 1ULL)
+#endif
+
+#endif /* _LIMITS_H___ */
+/* This administrivia gets added to the end of limits.h
+ if the system has its own version of limits.h. */
+
+#else /* not _GCC_LIMITS_H_ */
+
+#ifdef _GCC_NEXT_LIMITS_H
+#include_next <limits.h> /* recurse down to the real one */
+#endif
+
+#endif /* not _GCC_LIMITS_H_ */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include-fixed/linux/a.out.h b/lib/gcc/x86_64-linux-android/4.9/include-fixed/linux/a.out.h
new file mode 100644
index 0000000..17fc4b4
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include-fixed/linux/a.out.h
@@ -0,0 +1,235 @@
+/* DO NOT EDIT THIS FILE.
+
+ It has been auto-edited by fixincludes from:
+
+ "/tmp/ndk-andrewhsieh/build/toolchain/prefix/sysroot/usr/include/linux/a.out.h"
+
+ This had to be done to correct non-standard usages in the
+ original, manufacturer supplied header file. */
+
+/****************************************************************************
+ ****************************************************************************
+ ***
+ *** This header was automatically generated from a Linux kernel header
+ *** of the same name, to make information necessary for userspace to
+ *** call into the kernel available to libc. It contains only constants,
+ *** structures, and macros generated from the original header, and thus,
+ *** contains no copyrightable information.
+ ***
+ *** To edit the content of this header, modify the corresponding
+ *** source file (e.g. under external/kernel-headers/original/) then
+ *** run bionic/libc/kernel/tools/update_all.py
+ ***
+ *** Any manual change here will be lost the next time this script will
+ *** be run. You've been warned!
+ ***
+ ****************************************************************************
+ ****************************************************************************/
+#ifndef _UAPI__A_OUT_GNU_H__
+#define _UAPI__A_OUT_GNU_H__
+#define __GNU_EXEC_MACROS__
+#ifndef __STRUCT_EXEC_OVERRIDE__
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#include <asm/a.out.h>
+#endif
+#ifndef __ASSEMBLY__
+enum machine_type {
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifdef M_OLDSUN2
+ M__OLDSUN2 = M_OLDSUN2,
+#else
+ M_OLDSUN2 = 0,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifdef M_68010
+ M__68010 = M_68010,
+#else
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ M_68010 = 1,
+#endif
+#ifdef M_68020
+ M__68020 = M_68020,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#else
+ M_68020 = 2,
+#endif
+#ifdef M_SPARC
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ M__SPARC = M_SPARC,
+#else
+ M_SPARC = 3,
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ M_386 = 100,
+ M_MIPS1 = 151,
+ M_MIPS2 = 152
+};
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_MAGIC
+#define N_MAGIC(exec) ((exec).a_info & 0xffff)
+#endif
+#define N_MACHTYPE(exec) ((enum machine_type)(((exec).a_info >> 16) & 0xff))
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_FLAGS(exec) (((exec).a_info >> 24) & 0xff)
+#define N_SET_INFO(exec, magic, type, flags) ((exec).a_info = ((magic) & 0xffff) | (((int)(type) & 0xff) << 16) | (((flags) & 0xff) << 24))
+#define N_SET_MAGIC(exec, magic) ((exec).a_info = (((exec).a_info & 0xffff0000) | ((magic) & 0xffff)))
+#define N_SET_MACHTYPE(exec, machtype) ((exec).a_info = ((exec).a_info&0xff00ffff) | ((((int)(machtype))&0xff) << 16))
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_SET_FLAGS(exec, flags) ((exec).a_info = ((exec).a_info&0x00ffffff) | (((flags) & 0xff) << 24))
+#define OMAGIC 0407
+#define NMAGIC 0410
+#define ZMAGIC 0413
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define QMAGIC 0314
+#define CMAGIC 0421
+#ifndef N_BADMAG
+#define N_BADMAG(x) (N_MAGIC(x) != OMAGIC && N_MAGIC(x) != NMAGIC && N_MAGIC(x) != ZMAGIC && N_MAGIC(x) != QMAGIC)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#define _N_HDROFF(x) (1024 - sizeof (struct exec))
+#ifndef N_TXTOFF
+#define N_TXTOFF(x) (N_MAGIC(x) == ZMAGIC ? _N_HDROFF((x)) + sizeof (struct exec) : (N_MAGIC(x) == QMAGIC ? 0 : sizeof (struct exec)))
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifndef N_DATOFF
+#define N_DATOFF(x) (N_TXTOFF(x) + (x).a_text)
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_TRELOFF
+#define N_TRELOFF(x) (N_DATOFF(x) + (x).a_data)
+#endif
+#ifndef N_DRELOFF
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_DRELOFF(x) (N_TRELOFF(x) + N_TRSIZE(x))
+#endif
+#ifndef N_SYMOFF
+#define N_SYMOFF(x) (N_DRELOFF(x) + N_DRSIZE(x))
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifndef N_STROFF
+#define N_STROFF(x) (N_SYMOFF(x) + N_SYMSIZE(x))
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_TXTADDR
+#define N_TXTADDR(x) (N_MAGIC(x) == QMAGIC ? PAGE_SIZE : 0)
+#endif
+#if defined(vax) || defined(hp300) || defined(pyr)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define SEGMENT_SIZE page_size
+#endif
+#ifdef sony
+#define SEGMENT_SIZE 0x2000
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifdef is68k
+#define SEGMENT_SIZE 0x20000
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#if defined(m68k) && defined(PORTAR)
+#define PAGE_SIZE 0x400
+#define SEGMENT_SIZE PAGE_SIZE
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifdef __linux__
+#include <unistd.h>
+#if defined(__i386__) || defined(__mc68000__)
+#define SEGMENT_SIZE 1024
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#else
+#ifndef SEGMENT_SIZE
+#define SEGMENT_SIZE getpagesize()
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#endif
+#define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
+#define _N_TXTENDADDR(x) (N_TXTADDR(x)+(x).a_text)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_DATADDR
+#define N_DATADDR(x) (N_MAGIC(x)==OMAGIC? (_N_TXTENDADDR(x)) : (_N_SEGMENT_ROUND (_N_TXTENDADDR(x))))
+#endif
+#ifndef N_BSSADDR
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_BSSADDR(x) (N_DATADDR(x) + (x).a_data)
+#endif
+#ifndef N_NLIST_DECLARED
+struct nlist {
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ union {
+ char *n_name;
+ struct nlist *n_next;
+ long n_strx;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ } n_un;
+ unsigned char n_type;
+ char n_other;
+ short n_desc;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned long n_value;
+};
+#endif
+#ifndef N_UNDF
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_UNDF 0
+#endif
+#ifndef N_ABS
+#define N_ABS 2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifndef N_TEXT
+#define N_TEXT 4
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_DATA
+#define N_DATA 6
+#endif
+#ifndef N_BSS
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_BSS 8
+#endif
+#ifndef N_FN
+#define N_FN 15
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#ifndef N_EXT
+#define N_EXT 1
+#endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_TYPE
+#define N_TYPE 036
+#endif
+#ifndef N_STAB
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_STAB 0340
+#endif
+#define N_INDR 0xa
+#define N_SETA 0x14
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define N_SETT 0x16
+#define N_SETD 0x18
+#define N_SETB 0x1A
+#define N_SETV 0x1C
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifndef N_RELOCATION_INFO_DECLARED
+struct relocation_info
+{
+ int r_address;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ unsigned int r_symbolnum:24;
+ unsigned int r_pcrel:1;
+ unsigned int r_length:2;
+ unsigned int r_extern:1;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#ifdef NS32K
+ unsigned r_bsr:1;
+ unsigned r_disp:1;
+ unsigned r_pad:2;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#else
+ unsigned int r_pad:4;
+#endif
+};
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#endif
+#endif
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include-fixed/stdio.h b/lib/gcc/x86_64-linux-android/4.9/include-fixed/stdio.h
new file mode 100644
index 0000000..1089c96
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include-fixed/stdio.h
@@ -0,0 +1,465 @@
+/* DO NOT EDIT THIS FILE.
+
+ It has been auto-edited by fixincludes from:
+
+ "/tmp/ndk-andrewhsieh/build/toolchain/prefix/sysroot/usr/include/stdio.h"
+
+ This had to be done to correct non-standard usages in the
+ original, manufacturer supplied header file. */
+
+/* $OpenBSD: stdio.h,v 1.35 2006/01/13 18:10:09 miod Exp $ */
+/* $NetBSD: stdio.h,v 1.18 1996/04/25 18:29:21 jtc Exp $ */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)stdio.h 5.17 (Berkeley) 6/3/91
+ */
+
+#ifndef _STDIO_H_
+#define _STDIO_H_
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+#include <stdarg.h>
+#include <stddef.h>
+
+#define __need_NULL
+#include <stddef.h>
+
+#define _FSTDIO /* Define for new stdio with functions. */
+
+typedef off_t fpos_t; /* stdio file position type */
+
+/*
+ * NB: to fit things in six character monocase externals, the stdio
+ * code uses the prefix `__s' for stdio objects, typically followed
+ * by a three-character attempt at a mnemonic.
+ */
+
+/* stdio buffers */
+struct __sbuf {
+ unsigned char *_base;
+ int _size;
+};
+
+/*
+ * stdio state variables.
+ *
+ * The following always hold:
+ *
+ * if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR),
+ * _lbfsize is -_bf._size, else _lbfsize is 0
+ * if _flags&__SRD, _w is 0
+ * if _flags&__SWR, _r is 0
+ *
+ * This ensures that the getc and putc macros (or inline functions) never
+ * try to write or read from a file that is in `read' or `write' mode.
+ * (Moreover, they can, and do, automatically switch from read mode to
+ * write mode, and back, on "r+" and "w+" files.)
+ *
+ * _lbfsize is used only to make the inline line-buffered output stream
+ * code as compact as possible.
+ *
+ * _ub, _up, and _ur are used when ungetc() pushes back more characters
+ * than fit in the current _bf, or when ungetc() pushes back a character
+ * that does not match the previous one in _bf. When this happens,
+ * _ub._base becomes non-nil (i.e., a stream has ungetc() data iff
+ * _ub._base!=NULL) and _up and _ur save the current values of _p and _r.
+ *
+ * NOTE: if you change this structure, you also need to update the
+ * std() initializer in findfp.c.
+ */
+typedef struct __sFILE {
+ unsigned char *_p; /* current position in (some) buffer */
+ int _r; /* read space left for getc() */
+ int _w; /* write space left for putc() */
+ short _flags; /* flags, below; this FILE is free if 0 */
+ short _file; /* fileno, if Unix descriptor, else -1 */
+ struct __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
+ int _lbfsize; /* 0 or -_bf._size, for inline putc */
+
+ /* operations */
+ void *_cookie; /* cookie passed to io functions */
+ int (*_close)(void *);
+ int (*_read)(void *, char *, int);
+ fpos_t (*_seek)(void *, fpos_t, int);
+ int (*_write)(void *, const char *, int);
+
+ /* extension data, to avoid further ABI breakage */
+ struct __sbuf _ext;
+ /* data for long sequences of ungetc() */
+ unsigned char *_up; /* saved _p when _p is doing ungetc data */
+ int _ur; /* saved _r when _r is counting ungetc data */
+
+ /* tricks to meet minimum requirements even when malloc() fails */
+ unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
+ unsigned char _nbuf[1]; /* guarantee a getc() buffer */
+
+ /* separate buffer for fgetln() when line crosses buffer boundary */
+ struct __sbuf _lb; /* buffer for fgetln() */
+
+ /* Unix stdio files get aligned to block boundaries on fseek() */
+ int _blksize; /* stat.st_blksize (may be != _bf._size) */
+ fpos_t _offset; /* current lseek offset */
+} FILE;
+
+__BEGIN_DECLS
+extern FILE __sF[];
+__END_DECLS
+
+#define __SLBF 0x0001 /* line buffered */
+#define __SNBF 0x0002 /* unbuffered */
+#define __SRD 0x0004 /* OK to read */
+#define __SWR 0x0008 /* OK to write */
+ /* RD and WR are never simultaneously asserted */
+#define __SRW 0x0010 /* open for reading & writing */
+#define __SEOF 0x0020 /* found EOF */
+#define __SERR 0x0040 /* found error */
+#define __SMBF 0x0080 /* _buf is from malloc */
+#define __SAPP 0x0100 /* fdopen()ed in append mode */
+#define __SSTR 0x0200 /* this is an sprintf/snprintf string */
+#define __SOPT 0x0400 /* do fseek() optimization */
+#define __SNPT 0x0800 /* do not do fseek() optimization */
+#define __SOFF 0x1000 /* set iff _offset is in fact correct */
+#define __SMOD 0x2000 /* true => fgetln modified _p text */
+#define __SALC 0x4000 /* allocate string space dynamically */
+#define __SIGN 0x8000 /* ignore this file in _fwalk */
+
+/*
+ * The following three definitions are for ANSI C, which took them
+ * from System V, which brilliantly took internal interface macros and
+ * made them official arguments to setvbuf(), without renaming them.
+ * Hence, these ugly _IOxxx names are *supposed* to appear in user code.
+ *
+ * Although numbered as their counterparts above, the implementation
+ * does not rely on this.
+ */
+#define _IOFBF 0 /* setvbuf should set fully buffered */
+#define _IOLBF 1 /* setvbuf should set line buffered */
+#define _IONBF 2 /* setvbuf should set unbuffered */
+
+#define BUFSIZ 1024 /* size of buffer used by setbuf */
+#define EOF (-1)
+
+/*
+ * FOPEN_MAX is a minimum maximum, and is the number of streams that
+ * stdio can provide without attempting to allocate further resources
+ * (which could fail). Do not use this for anything.
+ */
+
+#define FOPEN_MAX 20 /* must be <= OPEN_MAX <sys/syslimits.h> */
+#define FILENAME_MAX 1024 /* must be <= PATH_MAX <sys/syslimits.h> */
+
+/* System V/ANSI C; this is the wrong way to do this, do *not* use these. */
+#if __BSD_VISIBLE || __XPG_VISIBLE
+#define P_tmpdir "/tmp/"
+#endif
+#define L_tmpnam 1024 /* XXX must be == PATH_MAX */
+#define TMP_MAX 308915776
+
+/* Always ensure that these are consistent with <fcntl.h> and <unistd.h>! */
+#ifndef SEEK_SET
+#define SEEK_SET 0 /* set file offset to offset */
+#endif
+#ifndef SEEK_CUR
+#define SEEK_CUR 1 /* set file offset to current plus offset */
+#endif
+#ifndef SEEK_END
+#define SEEK_END 2 /* set file offset to EOF plus offset */
+#endif
+
+#define stdin (&__sF[0])
+#define stdout (&__sF[1])
+#define stderr (&__sF[2])
+
+/*
+ * Functions defined in ANSI C standard.
+ */
+__BEGIN_DECLS
+void clearerr(FILE *);
+int fclose(FILE *);
+int feof(FILE *);
+int ferror(FILE *);
+int fflush(FILE *);
+int fgetc(FILE *);
+char *fgets(char * __restrict, int, FILE * __restrict);
+FILE *fopen(const char * __restrict , const char * __restrict);
+int fprintf(FILE * __restrict , const char * __restrict, ...)
+ __printflike(2, 3);
+int fputc(int, FILE *);
+int fputs(const char * __restrict, FILE * __restrict);
+size_t fread(void * __restrict, size_t, size_t, FILE * __restrict);
+FILE *freopen(const char * __restrict, const char * __restrict,
+ FILE * __restrict);
+int fscanf(FILE * __restrict, const char * __restrict, ...)
+ __scanflike(2, 3);
+int fseek(FILE *, long, int);
+long ftell(FILE *);
+size_t fwrite(const void * __restrict, size_t, size_t, FILE * __restrict);
+int getc(FILE *);
+int getchar(void);
+ssize_t getdelim(char ** __restrict, size_t * __restrict, int,
+ FILE * __restrict);
+ssize_t getline(char ** __restrict, size_t * __restrict, FILE * __restrict);
+
+#if __BSD_VISIBLE && !defined(__SYS_ERRLIST)
+#define __SYS_ERRLIST
+extern int sys_nerr; /* perror(3) external variables */
+extern char *sys_errlist[];
+#endif
+
+void perror(const char *);
+int printf(const char * __restrict, ...)
+ __printflike(1, 2);
+int putc(int, FILE *);
+int putchar(int);
+int puts(const char *);
+int remove(const char *);
+void rewind(FILE *);
+int scanf(const char * __restrict, ...)
+ __scanflike(1, 2);
+void setbuf(FILE * __restrict, char * __restrict);
+int setvbuf(FILE * __restrict, char * __restrict, int, size_t);
+int sscanf(const char * __restrict, const char * __restrict, ...)
+ __scanflike(2, 3);
+FILE *tmpfile(void);
+int ungetc(int, FILE *);
+int vfprintf(FILE * __restrict, const char * __restrict, __gnuc_va_list)
+ __printflike(2, 0);
+int vprintf(const char * __restrict, __gnuc_va_list)
+ __printflike(1, 0);
+
+int dprintf(int, const char * __restrict, ...) __printflike(2, 3);
+int vdprintf(int, const char * __restrict, __gnuc_va_list) __printflike(2, 0);
+
+#ifndef __AUDIT__
+char* gets(char*) __warnattr("gets is very unsafe; consider using fgets");
+int sprintf(char* __restrict, const char* __restrict, ...)
+ __printflike(2, 3); //__warnattr("sprintf is often misused; please use snprintf");
+char* tmpnam(char*) __warnattr("tmpnam possibly used unsafely; consider using mkstemp");
+int vsprintf(char* __restrict, const char* __restrict, __gnuc_va_list)
+ __printflike(2, 0); //__warnattr("vsprintf is often misused; please use vsnprintf");
+#if __XPG_VISIBLE
+char* tempnam(const char*, const char*)
+ __warnattr("tempnam possibly used unsafely; consider using mkstemp");
+#endif
+#endif
+
+extern int rename(const char*, const char*);
+extern int renameat(int, const char*, int, const char*);
+
+int fgetpos(FILE * __restrict, fpos_t * __restrict);
+int fsetpos(FILE *, const fpos_t *);
+
+int fseeko(FILE *, off_t, int);
+off_t ftello(FILE *);
+
+#if __ISO_C_VISIBLE >= 1999 || __BSD_VISIBLE
+int snprintf(char * __restrict, size_t, const char * __restrict, ...)
+ __printflike(3, 4);
+int vfscanf(FILE * __restrict, const char * __restrict, __gnuc_va_list)
+ __scanflike(2, 0);
+int vscanf(const char *, __gnuc_va_list)
+ __scanflike(1, 0);
+int vsnprintf(char * __restrict, size_t, const char * __restrict, __gnuc_va_list)
+ __printflike(3, 0);
+int vsscanf(const char * __restrict, const char * __restrict, __gnuc_va_list)
+ __scanflike(2, 0);
+#endif /* __ISO_C_VISIBLE >= 1999 || __BSD_VISIBLE */
+
+__END_DECLS
+
+
+/*
+ * Functions defined in POSIX 1003.1.
+ */
+#if __BSD_VISIBLE || __POSIX_VISIBLE || __XPG_VISIBLE
+#define L_ctermid 1024 /* size for ctermid(); PATH_MAX */
+#define L_cuserid 9 /* size for cuserid(); UT_NAMESIZE + 1 */
+
+__BEGIN_DECLS
+#if 0 /* MISSING FROM BIONIC */
+char *ctermid(char *);
+char *cuserid(char *);
+#endif /* MISSING */
+FILE *fdopen(int, const char *);
+int fileno(FILE *);
+
+#if (__POSIX_VISIBLE >= 199209)
+int pclose(FILE *);
+FILE *popen(const char *, const char *);
+#endif
+
+#if __POSIX_VISIBLE >= 199506
+void flockfile(FILE *);
+int ftrylockfile(FILE *);
+void funlockfile(FILE *);
+
+/*
+ * These are normally used through macros as defined below, but POSIX
+ * requires functions as well.
+ */
+int getc_unlocked(FILE *);
+int getchar_unlocked(void);
+int putc_unlocked(int, FILE *);
+int putchar_unlocked(int);
+#endif /* __POSIX_VISIBLE >= 199506 */
+
+__END_DECLS
+
+#endif /* __BSD_VISIBLE || __POSIX_VISIBLE || __XPG_VISIBLE */
+
+/*
+ * Routines that are purely local.
+ */
+#if __BSD_VISIBLE
+__BEGIN_DECLS
+int asprintf(char ** __restrict, const char * __restrict, ...)
+ __printflike(2, 3);
+char *fgetln(FILE * __restrict, size_t * __restrict);
+int fpurge(FILE *);
+int getw(FILE *);
+int putw(int, FILE *);
+void setbuffer(FILE *, char *, int);
+int setlinebuf(FILE *);
+int vasprintf(char ** __restrict, const char * __restrict,
+ __gnuc_va_list)
+ __printflike(2, 0);
+__END_DECLS
+
+/*
+ * Stdio function-access interface.
+ */
+__BEGIN_DECLS
+FILE *funopen(const void *,
+ int (*)(void *, char *, int),
+ int (*)(void *, const char *, int),
+ fpos_t (*)(void *, fpos_t, int),
+ int (*)(void *));
+__END_DECLS
+#define fropen(cookie, fn) funopen(cookie, fn, 0, 0, 0)
+#define fwopen(cookie, fn) funopen(cookie, 0, fn, 0, 0)
+#endif /* __BSD_VISIBLE */
+
+#if defined(__BIONIC_FORTIFY)
+
+__BEGIN_DECLS
+
+__BIONIC_FORTIFY_INLINE
+__printflike(3, 0)
+int vsnprintf(char *dest, size_t size, const char *format, __va_list ap)
+{
+ return __builtin___vsnprintf_chk(dest, size, 0, __bos(dest), format, ap);
+}
+
+__BIONIC_FORTIFY_INLINE
+__printflike(2, 0)
+int vsprintf(char *dest, const char *format, __va_list ap)
+{
+ return __builtin___vsprintf_chk(dest, 0, __bos(dest), format, ap);
+}
+
+#if defined(__clang__)
+ #if !defined(snprintf)
+ #define __wrap_snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
+ #define snprintf(...) __wrap_snprintf(__VA_ARGS__)
+ #endif
+#else
+__BIONIC_FORTIFY_INLINE
+__printflike(3, 4)
+int snprintf(char *dest, size_t size, const char *format, ...)
+{
+ return __builtin___snprintf_chk(dest, size, 0,
+ __bos(dest), format, __builtin_va_arg_pack());
+}
+#endif
+
+#if defined(__clang__)
+ #if !defined(sprintf)
+ #define __wrap_sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
+ #define sprintf(...) __wrap_sprintf(__VA_ARGS__)
+ #endif
+#else
+__BIONIC_FORTIFY_INLINE
+__printflike(2, 3)
+int sprintf(char *dest, const char *format, ...)
+{
+ return __builtin___sprintf_chk(dest, 0,
+ __bos(dest), format, __builtin_va_arg_pack());
+}
+#endif
+
+extern char* __fgets_chk(char*, int, FILE*, size_t);
+extern char* __fgets_real(char*, int, FILE*) __asm__(__USER_LABEL_PREFIX__ "fgets");
+__errordecl(__fgets_too_big_error, "fgets called with size bigger than buffer");
+__errordecl(__fgets_too_small_error, "fgets called with size less than zero");
+
+#if !defined(__clang__)
+
+__BIONIC_FORTIFY_INLINE
+char *fgets(char* dest, int size, FILE* stream) {
+ size_t bos = __bos(dest);
+
+ // Compiler can prove, at compile time, that the passed in size
+ // is always negative. Force a compiler error.
+ if (__builtin_constant_p(size) && (size < 0)) {
+ __fgets_too_small_error();
+ }
+
+ // Compiler doesn't know destination size. Don't call __fgets_chk
+ if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+ return __fgets_real(dest, size, stream);
+ }
+
+ // Compiler can prove, at compile time, that the passed in size
+ // is always <= the actual object size. Don't call __fgets_chk
+ if (__builtin_constant_p(size) && (size <= (int) bos)) {
+ return __fgets_real(dest, size, stream);
+ }
+
+ // Compiler can prove, at compile time, that the passed in size
+ // is always > the actual object size. Force a compiler error.
+ if (__builtin_constant_p(size) && (size > (int) bos)) {
+ __fgets_too_big_error();
+ }
+
+ return __fgets_chk(dest, size, stream, bos);
+}
+
+#endif /* !defined(__clang__) */
+
+__END_DECLS
+
+#endif /* defined(__BIONIC_FORTIFY) */
+
+#endif /* _STDIO_H_ */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include-fixed/syslimits.h b/lib/gcc/x86_64-linux-android/4.9/include-fixed/syslimits.h
new file mode 100644
index 0000000..a362802
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include-fixed/syslimits.h
@@ -0,0 +1,8 @@
+/* syslimits.h stands for the system's own limits.h file.
+ If we can use it ok unmodified, then we install this text.
+ If fixincludes fixes it, then the fixed version is installed
+ instead of this text. */
+
+#define _GCC_NEXT_LIMITS_H /* tell gcc's limits.h to recurse */
+#include_next <limits.h>
+#undef _GCC_NEXT_LIMITS_H
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/adxintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/adxintrin.h
new file mode 100644
index 0000000..6118900
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/adxintrin.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <adxintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _ADXINTRIN_H_INCLUDED
+#define _ADXINTRIN_H_INCLUDED
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u32 (unsigned char __CF, unsigned int __X,
+ unsigned int __Y, unsigned int *__P)
+{
+ return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarryx_u64 (unsigned char __CF, unsigned long __X,
+ unsigned long __Y, unsigned long long *__P)
+{
+ return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
+}
+#endif
+
+#endif /* _ADXINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/ammintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/ammintrin.h
new file mode 100644
index 0000000..a89b204
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/ammintrin.h
@@ -0,0 +1,93 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the AMD Programmers
+ Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+#ifndef __SSE4A__
+#pragma GCC push_options
+#pragma GCC target("sse4a")
+#define __DISABLE_SSE4A__
+#endif /* __SSE4A__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+ __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+ __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
+}
+#else
+#define _mm_extracti_si64(X, I, L) \
+ ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \
+ (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+ return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
+}
+#else
+#define _mm_inserti_si64(X, Y, I, L) \
+ ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+#ifdef __DISABLE_SSE4A__
+#undef __DISABLE_SSE4A__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/arm_neon.h b/lib/gcc/x86_64-linux-android/4.9/include/arm_neon.h
new file mode 100644
index 0000000..873b7d6
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/arm_neon.h
@@ -0,0 +1,8643 @@
+//created by Victoria Zhislina, the Senior Application Engineer, Intel Corporation, victoria.zhislina@intel.com
+
+//*** Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
+
+//IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+
+//By downloading, copying, installing or using the software you agree to this license.
+//If you do not agree to this license, do not download, install, copy or use the software.
+
+// License Agreement
+
+//Permission to use, copy, modify, and/or distribute this software for any
+//purpose with or without fee is hereby granted, provided that the above
+//copyright notice and this permission notice appear in all copies.
+
+//THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+//REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+//AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+//INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+//LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+//OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+//PERFORMANCE OF THIS SOFTWARE.
+
+//*****************************************************************************************
+// This file is intended to simplify ARM->IA32 porting
+// It makes the correspondence between ARM NEON intrinsics (as defined in "arm_neon.h")
+// and x86 SSE(up to SSE4.2) intrinsic functions as defined in headers files below
+// MMX instruction set is not used due to performance overhead and the necessity to use the
+// EMMS instruction (_mm_empty())for mmx-x87 floating point switching
+//*****************************************************************************************
+
+//!!!!!!! To use this file in your project that uses ARM NEON intinsics just keep arm_neon.h included and complile it as usual.
+//!!!!!!! Please pay attention at #define USE_SSSE3 and USE_SSE4 below - you need to define them for newest Intel platforms for
+//!!!!!!! greater performance. It can be done by -mssse3 or -msse4.2 (which also implies -mssse3) compiler switch.
+
+#ifndef NEON2SSE_H
+#define NEON2SSE_H
+
+#ifndef USE_SSE4
+ #if defined(__SSE4_2__)
+ #define USE_SSE4
+ #define USE_SSSE3
+ #endif
+#endif
+
+#ifndef USE_SSSE3
+ #if defined(__SSSE3__)
+ #define USE_SSSE3
+ #endif
+#endif
+
+#include <xmmintrin.h> //SSE
+#include <emmintrin.h> //SSE2
+#include <pmmintrin.h> //SSE3
+
+#ifdef USE_SSSE3
+ #include <tmmintrin.h> //SSSE3
+#else
+# warning "Some functions require SSSE3 or higher."
+#endif
+
+#ifdef USE_SSE4
+ #include <smmintrin.h> //SSE4.1
+ #include <nmmintrin.h> //SSE4.2
+#endif
+
+/*********************************************************************************************************************/
+// data types conversion
+/*********************************************************************************************************************/
+
+typedef __m128 float32x4_t;
+
+typedef __m128 float16x8_t; //not supported by IA, for compartibility
+
+typedef __m128i int8x16_t;
+typedef __m128i int16x8_t;
+typedef __m128i int32x4_t;
+typedef __m128i int64x2_t;
+typedef __m128i uint8x16_t;
+typedef __m128i uint16x8_t;
+typedef __m128i uint32x4_t;
+typedef __m128i uint64x2_t;
+typedef __m128i poly8x16_t;
+typedef __m128i poly16x8_t;
+
+#if defined(_MSC_VER) && (_MSC_VER < 1300)
+ typedef signed char int8_t;
+ typedef unsigned char uint8_t;
+ typedef signed short int16_t;
+ typedef unsigned short uint16_t;
+ typedef signed int int32_t;
+ typedef unsigned int uint32_t;
+ typedef signed long long int64_t;
+ typedef unsigned long long uint64_t;
+#elif defined(_MSC_VER)
+ typedef signed __int8 int8_t;
+ typedef unsigned __int8 uint8_t;
+ typedef signed __int16 int16_t;
+ typedef unsigned __int16 uint16_t;
+ typedef signed __int32 int32_t;
+ typedef unsigned __int32 uint32_t;
+
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+#else
+ #include <stdint.h>
+ #include <limits.h>
+#endif
+#if defined(_MSC_VER)
+#define SINT_MIN (-2147483647 - 1) /* min signed int value */
+#define SINT_MAX 2147483647 /* max signed int value */
+#else
+#define SINT_MIN INT_MIN /* min signed int value */
+#define SINT_MAX INT_MAX /* max signed int value */
+#endif
+
+typedef float float32_t;
+#if !defined(__clang__)
+typedef float __fp16;
+#endif
+
+typedef uint8_t poly8_t;
+typedef uint16_t poly16_t;
+
+//MSVC compilers (tested up to 2012 VS version) doesn't allow using structures or arrays of __m128x type as functions arguments resulting in
+//error C2719: 'src': formal parameter with __declspec(align('16')) won't be aligned. To avoid it we need the special trick for functions that use these types
+
+//Unfortunately we are unable to merge two 64-bits in on 128 bit register because user should be able to access val[n] members explicitly!!!
+struct int8x16x2_t {
+ int8x16_t val[2];
+};
+struct int16x8x2_t {
+ int16x8_t val[2];
+};
+struct int32x4x2_t {
+ int32x4_t val[2];
+};
+struct int64x2x2_t {
+ int64x2_t val[2];
+};
+
+typedef struct int8x16x2_t int8x16x2_t; //for C compilers to make them happy
+typedef struct int16x8x2_t int16x8x2_t; //for C compilers to make them happy
+typedef struct int32x4x2_t int32x4x2_t; //for C compilers to make them happy
+typedef struct int64x2x2_t int64x2x2_t; //for C compilers to make them happy
+//to avoid pointers conversion
+typedef int8x16x2_t int8x8x2_t;
+typedef int16x8x2_t int16x4x2_t;
+typedef int32x4x2_t int32x2x2_t;
+typedef int64x2x2_t int64x1x2_t;
+
+/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers structures above */
+typedef struct int8x16x2_t uint8x16x2_t;
+typedef struct int16x8x2_t uint16x8x2_t;
+typedef struct int32x4x2_t uint32x4x2_t;
+typedef struct int64x2x2_t uint64x2x2_t;
+typedef struct int8x16x2_t poly8x16x2_t;
+typedef struct int16x8x2_t poly16x8x2_t;
+
+typedef int8x8x2_t uint8x8x2_t;
+typedef int16x4x2_t uint16x4x2_t;
+typedef int32x2x2_t uint32x2x2_t;
+typedef int64x1x2_t uint64x1x2_t;
+typedef int8x8x2_t poly8x8x2_t;
+typedef int16x4x2_t poly16x4x2_t;
+
+//float
+struct float32x4x2_t {
+ float32x4_t val[2];
+};
+struct float16x8x2_t {
+ float16x8_t val[2];
+};
+typedef struct float32x4x2_t float32x4x2_t; //for C compilers to make them happy
+typedef struct float16x8x2_t float16x8x2_t; //for C compilers to make them happy
+typedef float32x4x2_t float32x2x2_t;
+typedef float16x8x2_t float16x4x2_t;
+
+//4
+struct int8x16x4_t {
+ int8x16_t val[4];
+};
+struct int16x8x4_t {
+ int16x8_t val[4];
+};
+struct int32x4x4_t {
+ int32x4_t val[4];
+};
+struct int64x2x4_t {
+ int64x2_t val[4];
+};
+
+typedef struct int8x16x4_t int8x16x4_t; //for C compilers to make them happy
+typedef struct int16x8x4_t int16x8x4_t; //for C compilers to make them happy
+typedef struct int32x4x4_t int32x4x4_t; //for C compilers to make them happy
+typedef struct int64x2x4_t int64x2x4_t; //for C compilers to make them happy
+typedef int8x16x4_t int8x8x4_t;
+typedef int16x8x4_t int16x4x4_t;
+typedef int32x4x4_t int32x2x4_t;
+typedef int64x2x4_t int64x1x4_t;
+
+/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers dealing structures above:*/
+typedef int8x8x4_t uint8x8x4_t;
+typedef int16x4x4_t uint16x4x4_t;
+typedef int32x2x4_t uint32x2x4_t;
+typedef int64x1x4_t uint64x1x4_t;
+typedef uint8x8x4_t poly8x8x4_t;
+typedef uint16x4x4_t poly16x4x4_t;
+
+typedef struct int8x16x4_t uint8x16x4_t;
+typedef struct int16x8x4_t uint16x8x4_t;
+typedef struct int32x4x4_t uint32x4x4_t;
+typedef struct int64x2x4_t uint64x2x4_t;
+typedef struct int8x16x4_t poly8x16x4_t;
+typedef struct int16x8x4_t poly16x8x4_t;
+
+struct float32x4x4_t {
+ float32x4_t val[4];
+};
+struct float16x8x4_t {
+ float16x8_t val[4];
+};
+
+typedef struct float32x4x4_t float32x4x4_t; //for C compilers to make them happy
+typedef struct float16x8x4_t float16x8x4_t; //for C compilers to make them happy
+typedef float32x4x4_t float32x2x4_t;
+typedef float16x8x4_t float16x4x4_t;
+
+//3
+struct int16x8x3_t {
+ int16x8_t val[3];
+};
+struct int32x4x3_t {
+ int32x4_t val[3];
+};
+struct int64x2x3_t {
+ int64x2_t val[3];
+};
+struct int8x16x3_t {
+ int8x16_t val[3];
+};
+
+typedef struct int16x8x3_t int16x8x3_t; //for C compilers to make them happy
+typedef struct int32x4x3_t int32x4x3_t; //for C compilers to make them happy
+typedef struct int64x2x3_t int64x2x3_t; //for C compilers to make them happy
+typedef struct int8x16x3_t int8x16x3_t; //for C compilers to make them happy
+typedef int16x8x3_t int16x4x3_t;
+typedef int32x4x3_t int32x2x3_t;
+typedef int64x2x3_t int64x1x3_t;
+typedef int8x16x3_t int8x8x3_t;
+
+/* to avoid pointer conversions the following unsigned integers structures are defined via the corresponding signed integers dealing structures above:*/
+typedef struct int8x16x3_t uint8x16x3_t;
+typedef struct int16x8x3_t uint16x8x3_t;
+typedef struct int32x4x3_t uint32x4x3_t;
+typedef struct int64x2x3_t uint64x2x3_t;
+typedef struct int8x16x3_t poly8x16x3_t;
+typedef struct int16x8x3_t poly16x8x3_t;
+typedef int8x8x3_t uint8x8x3_t;
+typedef int16x4x3_t uint16x4x3_t;
+typedef int32x2x3_t uint32x2x3_t;
+typedef int64x1x3_t uint64x1x3_t;
+typedef int8x8x3_t poly8x8x3_t;
+typedef int16x4x3_t poly16x4x3_t;
+
+//float
+struct float32x4x3_t {
+ float32x4_t val[3];
+};
+struct float16x8x3_t {
+ float16x8_t val[3];
+};
+
+typedef struct float32x4x3_t float32x4x3_t; //for C compilers to make them happy
+typedef struct float16x8x3_t float16x8x3_t; //for C compilers to make them happy
+typedef float32x4x3_t float32x2x3_t;
+typedef float16x8x3_t float16x4x3_t;
+
+//****************************************************************************
+//****** Porting auxiliary macros ********************************************
+#define _M128i(a) (*(__m128i*)&(a))
+#define _M128d(a) (*(__m128d*)&(a))
+#define _M128(a) (*(__m128*)&(a))
+#define _Ui64(a) (*(uint64_t*)&(a))
+#define _UNSIGNED_T(a) u##a
+
+#define _SIGNBIT64 ((uint64_t)1 << 63)
+#define _SWAP_HI_LOW32 (2 | (3 << 2) | (0 << 4) | (1 << 6))
+#define _INSERTPS_NDX(srcField, dstField) (((srcField) << 6) | ((dstField) << 4) )
+
+#define _NEON2SSE_REASON_SLOW_SERIAL "The function may be very slow due to the serial implementation, please try to avoid it"
+#define _NEON2SSE_REASON_SLOW_UNEFFECTIVE "The function may be slow due to inefficient x86 SIMD implementation, please try to avoid it"
+
+//*************** functions attributes ********************************************
+//***********************************************************************************
+#ifdef __GNUC__
+ #define _GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+ #define _NEON2SSE_ALIGN_16 __attribute__((aligned(16)))
+ #define _NEON2SSE_INLINE extern inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ #if _GCC_VERSION < 40500
+ #define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) __attribute__((deprecated)) function
+ #else
+ #define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) __attribute__((deprecated(explanation))) function
+ #endif
+#elif defined(_MSC_VER)|| defined (__INTEL_COMPILER)
+ #define _NEON2SSE_ALIGN_16 __declspec(align(16))
+ #define _NEON2SSE_INLINE __inline
+ #define _NEON2SSE_PERFORMANCE_WARNING(function, EXPLANATION) __declspec(deprecated(EXPLANATION)) function
+#else
+ #define _NEON2SSE_ALIGN_16 __declspec(align(16))
+ #define _NEON2SSE_INLINE inline
+ #define _NEON2SSE_PERFORMANCE_WARNING(function, explanation) function
+#endif
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#define __constrange(min,max) const
+#define __transfersize(size)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+//*************************************************************************
+//*************************************************************************
+//********* Functions declarations as declared in original arm_neon.h *****
+//*************************************************************************
+//Vector add: vadd -> Vr[i]:=Va[i]+Vb[i], Vr, Va, Vb have equal lane sizes.
+
+int8x16_t vaddq_s8(int8x16_t a, int8x16_t b); // VADD.I8 q0,q0,q0
+int16x8_t vaddq_s16(int16x8_t a, int16x8_t b); // VADD.I16 q0,q0,q0
+int32x4_t vaddq_s32(int32x4_t a, int32x4_t b); // VADD.I32 q0,q0,q0
+int64x2_t vaddq_s64(int64x2_t a, int64x2_t b); // VADD.I64 q0,q0,q0
+float32x4_t vaddq_f32(float32x4_t a, float32x4_t b); // VADD.F32 q0,q0,q0
+uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b); // VADD.I8 q0,q0,q0
+uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b); // VADD.I16 q0,q0,q0
+uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b); // VADD.I32 q0,q0,q0
+uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b); // VADD.I64 q0,q0,q0
+//Vector long add: vaddl -> Vr[i]:=Va[i]+Vb[i], Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width.
+
+//Vector wide addw: vadd -> Vr[i]:=Va[i]+Vb[i]
+
+//Vector halving add: vhadd -> Vr[i]:=(Va[i]+Vb[i])>>1
+
+int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b); // VHADD.S8 q0,q0,q0
+int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b); // VHADD.S16 q0,q0,q0
+int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b); // VHADD.S32 q0,q0,q0
+uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b); // VHADD.U8 q0,q0,q0
+uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b); // VHADD.U16 q0,q0,q0
+uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b); // VHADD.U32 q0,q0,q0
+//Vector rounding halving add: vrhadd -> Vr[i]:=(Va[i]+Vb[i]+1)>>1
+
+int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b); // VRHADD.S8 q0,q0,q0
+int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b); // VRHADD.S16 q0,q0,q0
+int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b); // VRHADD.S32 q0,q0,q0
+uint8x16_t vrhaddq_u8(uint8x16_t a, uint8x16_t b); // VRHADD.U8 q0,q0,q0
+uint16x8_t vrhaddq_u16(uint16x8_t a, uint16x8_t b); // VRHADD.U16 q0,q0,q0
+uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b); // VRHADD.U32 q0,q0,q0
+//Vector saturating add: vqadd -> Vr[i]:=sat<size>(Va[i]+Vb[i])
+
+int8x16_t vqaddq_s8(int8x16_t a, int8x16_t b); // VQADD.S8 q0,q0,q0
+int16x8_t vqaddq_s16(int16x8_t a, int16x8_t b); // VQADD.S16 q0,q0,q0
+int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b); // VQADD.S32 q0,q0,q0
+int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b); // VQADD.S64 q0,q0,q0
+uint8x16_t vqaddq_u8(uint8x16_t a, uint8x16_t b); // VQADD.U8 q0,q0,q0
+uint16x8_t vqaddq_u16(uint16x8_t a, uint16x8_t b); // VQADD.U16 q0,q0,q0
+uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b); // VQADD.U32 q0,q0,q0
+uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b); // VQADD.U64 q0,q0,q0
+//Vector add high half: vaddhn-> Vr[i]:=Va[i]+Vb[i]
+
+//Vector rounding add high half: vraddhn
+
+//Multiplication
+//Vector multiply: vmul -> Vr[i] := Va[i] * Vb[i]
+
+int8x16_t vmulq_s8(int8x16_t a, int8x16_t b); // VMUL.I8 q0,q0,q0
+int16x8_t vmulq_s16(int16x8_t a, int16x8_t b); // VMUL.I16 q0,q0,q0
+int32x4_t vmulq_s32(int32x4_t a, int32x4_t b); // VMUL.I32 q0,q0,q0
+float32x4_t vmulq_f32(float32x4_t a, float32x4_t b); // VMUL.F32 q0,q0,q0
+uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b); // VMUL.I8 q0,q0,q0
+uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b); // VMUL.I16 q0,q0,q0
+uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b); // VMUL.I32 q0,q0,q0
+poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b); // VMUL.P8 q0,q0,q0
+//Vector multiply accumulate: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i]
+
+int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLA.I8 q0,q0,q0
+int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLA.I16 q0,q0,q0
+int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLA.I32 q0,q0,q0
+float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLA.F32 q0,q0,q0
+uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLA.I8 q0,q0,q0
+uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLA.I16 q0,q0,q0
+uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLA.I32 q0,q0,q0
+//Vector multiply accumulate long: vmlal -> Vr[i] := Va[i] + Vb[i] * Vc[i]
+
+//Vector multiply subtract: vmls -> Vr[i] := Va[i] - Vb[i] * Vc[i]
+
+int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLS.I8 q0,q0,q0
+int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLS.I16 q0,q0,q0
+int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLS.I32 q0,q0,q0
+float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLS.F32 q0,q0,q0
+uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLS.I8 q0,q0,q0
+uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLS.I16 q0,q0,q0
+uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLS.I32 q0,q0,q0
+//Vector multiply subtract long
+
+//Vector saturating doubling multiply high
+
+int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b); // VQDMULH.S16 q0,q0,q0
+int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b); // VQDMULH.S32 q0,q0,q0
+//Vector saturating rounding doubling multiply high
+
+int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b); // VQRDMULH.S16 q0,q0,q0
+int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b); // VQRDMULH.S32 q0,q0,q0
+//Vector saturating doubling multiply accumulate long
+
+//Vector saturating doubling multiply subtract long
+
+//Vector long multiply
+
+//Vector saturating doubling long multiply
+
+//Subtraction
+//Vector subtract
+
+int8x16_t vsubq_s8(int8x16_t a, int8x16_t b); // VSUB.I8 q0,q0,q0
+int16x8_t vsubq_s16(int16x8_t a, int16x8_t b); // VSUB.I16 q0,q0,q0
+int32x4_t vsubq_s32(int32x4_t a, int32x4_t b); // VSUB.I32 q0,q0,q0
+int64x2_t vsubq_s64(int64x2_t a, int64x2_t b); // VSUB.I64 q0,q0,q0
+float32x4_t vsubq_f32(float32x4_t a, float32x4_t b); // VSUB.F32 q0,q0,q0
+uint8x16_t vsubq_u8(uint8x16_t a, uint8x16_t b); // VSUB.I8 q0,q0,q0
+uint16x8_t vsubq_u16(uint16x8_t a, uint16x8_t b); // VSUB.I16 q0,q0,q0
+uint32x4_t vsubq_u32(uint32x4_t a, uint32x4_t b); // VSUB.I32 q0,q0,q0
+uint64x2_t vsubq_u64(uint64x2_t a, uint64x2_t b); // VSUB.I64 q0,q0,q0
+//Vector long subtract: vsub -> Vr[i]:=Va[i]+Vb[i]
+
+//Vector wide subtract: vsub -> Vr[i]:=Va[i]+Vb[i]
+
+//Vector saturating subtract
+
+int8x16_t vqsubq_s8(int8x16_t a, int8x16_t b); // VQSUB.S8 q0,q0,q0
+int16x8_t vqsubq_s16(int16x8_t a, int16x8_t b); // VQSUB.S16 q0,q0,q0
+int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b); // VQSUB.S32 q0,q0,q0
+int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b); // VQSUB.S64 q0,q0,q0
+uint8x16_t vqsubq_u8(uint8x16_t a, uint8x16_t b); // VQSUB.U8 q0,q0,q0
+uint16x8_t vqsubq_u16(uint16x8_t a, uint16x8_t b); // VQSUB.U16 q0,q0,q0
+uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b); // VQSUB.U32 q0,q0,q0
+uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b); // VQSUB.U64 q0,q0,q0
+//Vector halving subtract
+
+int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b); // VHSUB.S8 q0,q0,q0
+int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b); // VHSUB.S16 q0,q0,q0
+int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b); // VHSUB.S32 q0,q0,q0
+uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b); // VHSUB.U8 q0,q0,q0
+uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b); // VHSUB.U16 q0,q0,q0
+uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b); // VHSUB.U32 q0,q0,q0
+//Vector subtract high half
+
+//Vector rounding subtract high half
+
+//Comparison
+//Vector compare equal
+
+uint8x16_t vceqq_s8(int8x16_t a, int8x16_t b); // VCEQ.I8 q0, q0, q0
+uint16x8_t vceqq_s16(int16x8_t a, int16x8_t b); // VCEQ.I16 q0, q0, q0
+uint32x4_t vceqq_s32(int32x4_t a, int32x4_t b); // VCEQ.I32 q0, q0, q0
+uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b); // VCEQ.F32 q0, q0, q0
+uint8x16_t vceqq_u8(uint8x16_t a, uint8x16_t b); // VCEQ.I8 q0, q0, q0
+uint16x8_t vceqq_u16(uint16x8_t a, uint16x8_t b); // VCEQ.I16 q0, q0, q0
+uint32x4_t vceqq_u32(uint32x4_t a, uint32x4_t b); // VCEQ.I32 q0, q0, q0
+uint8x16_t vceqq_p8(poly8x16_t a, poly8x16_t b); // VCEQ.I8 q0, q0, q0
+//Vector compare greater-than or equal
+
+uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0
+uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0
+uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0
+uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0
+uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0
+uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b); // VCGE.U16 q0, q0, q0
+uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0
+//Vector compare less-than or equal
+
+uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0
+uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0
+uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0
+uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0
+uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0
+uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b); // VCGE.U16 q0, q0, q0
+uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0
+//Vector compare greater-than
+
+uint8x16_t vcgtq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0
+uint16x8_t vcgtq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0
+uint32x4_t vcgtq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0
+uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0
+uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0
+uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b); // VCGT.U16 q0, q0, q0
+uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0
+//Vector compare less-than
+
+uint8x16_t vcltq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0
+uint16x8_t vcltq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0
+uint32x4_t vcltq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0
+uint32x4_t vcltq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0
+uint8x16_t vcltq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0
+uint16x8_t vcltq_u16(uint16x8_t a, uint16x8_t b); // VCGT.U16 q0, q0, q0
+uint32x4_t vcltq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0
+//Vector compare absolute greater-than or equal
+
+uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0
+//Vector compare absolute less-than or equal
+
+uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0
+//Vector compare absolute greater-than
+
+uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0
+//Vector compare absolute less-than
+
+uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0
+//Vector test bits
+
+uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b); // VTST.8 q0, q0, q0
+uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b); // VTST.16 q0, q0, q0
+uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b); // VTST.32 q0, q0, q0
+uint8x16_t vtstq_u8(uint8x16_t a, uint8x16_t b); // VTST.8 q0, q0, q0
+uint16x8_t vtstq_u16(uint16x8_t a, uint16x8_t b); // VTST.16 q0, q0, q0
+uint32x4_t vtstq_u32(uint32x4_t a, uint32x4_t b); // VTST.32 q0, q0, q0
+uint8x16_t vtstq_p8(poly8x16_t a, poly8x16_t b); // VTST.8 q0, q0, q0
+//Absolute difference
+//Absolute difference between the arguments: Vr[i] = | Va[i] - Vb[i] |
+
+int8x16_t vabdq_s8(int8x16_t a, int8x16_t b); // VABD.S8 q0,q0,q0
+int16x8_t vabdq_s16(int16x8_t a, int16x8_t b); // VABD.S16 q0,q0,q0
+int32x4_t vabdq_s32(int32x4_t a, int32x4_t b); // VABD.S32 q0,q0,q0
+uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0
+uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.U16 q0,q0,q0
+uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b); // VABD.U32 q0,q0,q0
+float32x4_t vabdq_f32(float32x4_t a, float32x4_t b); // VABD.F32 q0,q0,q0
+//Absolute difference - long
+
+//Absolute difference and accumulate: Vr[i] = Va[i] + | Vb[i] - Vc[i] |
+
+int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VABA.S8 q0,q0,q0
+int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VABA.S16 q0,q0,q0
+int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VABA.S32 q0,q0,q0
+uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VABA.U8 q0,q0,q0
+uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VABA.U16 q0,q0,q0
+uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VABA.U32 q0,q0,q0
+//Absolute difference and accumulate - long
+
+//Max/Min
+//vmax -> Vr[i] := (Va[i] >= Vb[i]) ? Va[i] : Vb[i]
+
+int8x16_t vmaxq_s8(int8x16_t a, int8x16_t b); // VMAX.S8 q0,q0,q0
+int16x8_t vmaxq_s16(int16x8_t a, int16x8_t b); // VMAX.S16 q0,q0,q0
+int32x4_t vmaxq_s32(int32x4_t a, int32x4_t b); // VMAX.S32 q0,q0,q0
+uint8x16_t vmaxq_u8(uint8x16_t a, uint8x16_t b); // VMAX.U8 q0,q0,q0
+uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.U16 q0,q0,q0
+uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0
+float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
+//vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i]
+
+int8x16_t vminq_s8(int8x16_t a, int8x16_t b); // VMIN.S8 q0,q0,q0
+int16x8_t vminq_s16(int16x8_t a, int16x8_t b); // VMIN.S16 q0,q0,q0
+int32x4_t vminq_s32(int32x4_t a, int32x4_t b); // VMIN.S32 q0,q0,q0
+uint8x16_t vminq_u8(uint8x16_t a, uint8x16_t b); // VMIN.U8 q0,q0,q0
+uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.U16 q0,q0,q0
+uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0
+float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
+//Pairwise addition
+//Pairwise add
+
+//Long pairwise add
+
+int16x8_t vpaddlq_s8(int8x16_t a); // VPADDL.S8 q0,q0
+int32x4_t vpaddlq_s16(int16x8_t a); // VPADDL.S16 q0,q0
+int64x2_t vpaddlq_s32(int32x4_t a); // VPADDL.S32 q0,q0
+uint16x8_t vpaddlq_u8(uint8x16_t a); // VPADDL.U8 q0,q0
+uint32x4_t vpaddlq_u16(uint16x8_t a); // VPADDL.U16 q0,q0
+uint64x2_t vpaddlq_u32(uint32x4_t a); // VPADDL.U32 q0,q0
+//Long pairwise add and accumulate
+
+int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b); // VPADAL.S8 q0,q0
+int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b); // VPADAL.S16 q0,q0
+int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b); // VPADAL.S32 q0,q0
+uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b); // VPADAL.U8 q0,q0
+uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b); // VPADAL.U16 q0,q0
+uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b); // VPADAL.U32 q0,q0
+//Folding maximum vpmax -> takes maximum of adjacent pairs
+
+//Folding minimum vpmin -> takes minimum of adjacent pairs
+
+//Reciprocal/Sqrt
+
+float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b); // VRECPS.F32 q0, q0, q0
+
+float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b); // VRSQRTS.F32 q0, q0, q0
+//Shifts by signed variable
+//Vector shift left: Vr[i] := Va[i] << Vb[i] (negative values shift right)
+
+int8x16_t vshlq_s8(int8x16_t a, int8x16_t b); // VSHL.S8 q0,q0,q0
+int16x8_t vshlq_s16(int16x8_t a, int16x8_t b); // VSHL.S16 q0,q0,q0
+int32x4_t vshlq_s32(int32x4_t a, int32x4_t b); // VSHL.S32 q0,q0,q0
+int64x2_t vshlq_s64(int64x2_t a, int64x2_t b); // VSHL.S64 q0,q0,q0
+uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b); // VSHL.U8 q0,q0,q0
+uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b); // VSHL.U16 q0,q0,q0
+uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b); // VSHL.U32 q0,q0,q0
+uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b); // VSHL.U64 q0,q0,q0
+//Vector saturating shift left: (negative values shift right)
+
+int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b); // VQSHL.S8 q0,q0,q0
+int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b); // VQSHL.S16 q0,q0,q0
+int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b); // VQSHL.S32 q0,q0,q0
+int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b); // VQSHL.S64 q0,q0,q0
+uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b); // VQSHL.U8 q0,q0,q0
+uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b); // VQSHL.U16 q0,q0,q0
+uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b); // VQSHL.U32 q0,q0,q0
+uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b); // VQSHL.U64 q0,q0,q0
+//Vector rounding shift left: (negative values shift right)
+
+int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b); // VRSHL.S8 q0,q0,q0
+int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b); // VRSHL.S16 q0,q0,q0
+int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b); // VRSHL.S32 q0,q0,q0
+int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b); // VRSHL.S64 q0,q0,q0
+uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b); // VRSHL.U8 q0,q0,q0
+uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b); // VRSHL.U16 q0,q0,q0
+uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b); // VRSHL.U32 q0,q0,q0
+uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b); // VRSHL.U64 q0,q0,q0
+//Vector saturating rounding shift left: (negative values shift right)
+
+int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b); // VQRSHL.S8 q0,q0,q0
+int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b); // VQRSHL.S16 q0,q0,q0
+int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b); // VQRSHL.S32 q0,q0,q0
+int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b); // VQRSHL.S64 q0,q0,q0
+uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b); // VQRSHL.U8 q0,q0,q0
+uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b); // VQRSHL.U16 q0,q0,q0
+uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b); // VQRSHL.U32 q0,q0,q0
+uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b); // VQRSHL.U64 q0,q0,q0
+//Shifts by a constant
+//Vector shift right by constant
+
+int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VSHR.S8 q0,q0,#8
+int16x8_t vshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VSHR.S16 q0,q0,#16
+int32x4_t vshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VSHR.S32 q0,q0,#32
+int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VSHR.S64 q0,q0,#64
+uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VSHR.U8 q0,q0,#8
+uint16x8_t vshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VSHR.U16 q0,q0,#16
+uint32x4_t vshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VSHR.U32 q0,q0,#32
+uint64x2_t vshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VSHR.U64 q0,q0,#64
+//Vector shift left by constant
+
+int8x16_t vshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0
+int16x8_t vshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0
+int32x4_t vshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0
+int64x2_t vshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0
+uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0
+uint16x8_t vshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0
+uint32x4_t vshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0
+uint64x2_t vshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0
+//Vector rounding shift right by constant
+
+int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VRSHR.S8 q0,q0,#8
+int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VRSHR.S16 q0,q0,#16
+int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VRSHR.S32 q0,q0,#32
+int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VRSHR.S64 q0,q0,#64
+uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VRSHR.U8 q0,q0,#8
+uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VRSHR.U16 q0,q0,#16
+uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VRSHR.U32 q0,q0,#32
+uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VRSHR.U64 q0,q0,#64
+//Vector shift right by constant and accumulate
+
+int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRA.S8 q0,q0,#8
+int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRA.S16 q0,q0,#16
+int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRA.S32 q0,q0,#32
+int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRA.S64 q0,q0,#64
+uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRA.U8 q0,q0,#8
+uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRA.U16 q0,q0,#16
+uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRA.U32 q0,q0,#32
+uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRA.U64 q0,q0,#64
+//Vector rounding shift right by constant and accumulate
+
+int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VRSRA.S8 q0,q0,#8
+int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VRSRA.S16 q0,q0,#16
+int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VRSRA.S32 q0,q0,#32
+int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VRSRA.S64 q0,q0,#64
+uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VRSRA.U8 q0,q0,#8
+uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VRSRA.U16 q0,q0,#16
+uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VRSRA.U32 q0,q0,#32
+uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VRSRA.U64 q0,q0,#64
+//Vector saturating shift left by constant
+
+int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHL.S8 q0,q0,#0
+int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHL.S16 q0,q0,#0
+int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHL.S32 q0,q0,#0
+int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHL.S64 q0,q0,#0
+uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VQSHL.U8 q0,q0,#0
+uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VQSHL.U16 q0,q0,#0
+uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VQSHL.U32 q0,q0,#0
+uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VQSHL.U64 q0,q0,#0
+//Vector signed->unsigned saturating shift left by constant
+
+uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHLU.S8 q0,q0,#0
+uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHLU.S16 q0,q0,#0
+uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHLU.S32 q0,q0,#0
+uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHLU.S64 q0,q0,#0
+//Vector narrowing shift right by constant
+
+//Vector signed->unsigned narrowing saturating shift right by constant
+
+//Vector signed->unsigned rounding narrowing saturating shift right by constant
+
+//Vector narrowing saturating shift right by constant
+
+//Vector rounding narrowing shift right by constant
+
+//Vector rounding narrowing saturating shift right by constant
+
+//Vector widening shift left by constant
+
+//Shifts with insert
+//Vector shift right and insert
+
+int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32
+int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64
+uint8x16_t vsriq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+uint16x8_t vsriq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+uint32x4_t vsriq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32
+uint64x2_t vsriq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64
+poly8x16_t vsriq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+poly16x8_t vsriq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+//Vector shift left and insert
+
+int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0
+int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0
+uint8x16_t vsliq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+uint16x8_t vsliq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+uint32x4_t vsliq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0
+uint64x2_t vsliq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0
+poly8x16_t vsliq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+//Loads of a single vector or lane. Perform loads and stores of a single vector of some type.
+//Load a single vector from memory
+uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+uint16x8_t vld1q_u16(__transfersize(8) uint16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+uint32x4_t vld1q_u32(__transfersize(4) uint32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+uint64x2_t vld1q_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+int8x16_t vld1q_s8(__transfersize(16) int8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+int16x8_t vld1q_s16(__transfersize(8) int16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+int32x4_t vld1q_s32(__transfersize(4) int32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+int64x2_t vld1q_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr); // VLD1.16 {d0, d1}, [r0]
+float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+poly8x16_t vld1q_p8(__transfersize(16) poly8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+poly16x8_t vld1q_p16(__transfersize(8) poly16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+
+//Load a single lane from memory
+uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0]
+uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
+uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0]
+uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0]
+int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0]
+int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); //VLD1.16 {d0[0]}, [r0]
+int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); //VLD1.32 {d0[0]}, [r0]
+float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0]
+int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); //VLD1.64 {d0}, [r0]
+poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0]
+poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
+
+//Load all lanes of vector with same value from memory
+uint8x16_t vld1q_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+uint16x8_t vld1q_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+uint32x4_t vld1q_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0]
+int8x16_t vld1q_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+int16x8_t vld1q_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+int32x4_t vld1q_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+int64x2_t vld1q_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0]
+float16x8_t vld1q_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0]
+float32x4_t vld1q_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+poly8x16_t vld1q_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+poly16x8_t vld1q_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+
+//Store a single vector or lane. Stores all lanes or a single lane of a vector.
+//Store a single vector into memory
+void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0]
+void vst1q_u16(__transfersize(8) uint16_t * ptr, uint16x8_t val); // VST1.16 {d0, d1}, [r0]
+void vst1q_u32(__transfersize(4) uint32_t * ptr, uint32x4_t val); // VST1.32 {d0, d1}, [r0]
+void vst1q_u64(__transfersize(2) uint64_t * ptr, uint64x2_t val); // VST1.64 {d0, d1}, [r0]
+void vst1q_s8(__transfersize(16) int8_t * ptr, int8x16_t val); // VST1.8 {d0, d1}, [r0]
+void vst1q_s16(__transfersize(8) int16_t * ptr, int16x8_t val); // VST1.16 {d0, d1}, [r0]
+void vst1q_s32(__transfersize(4) int32_t * ptr, int32x4_t val); // VST1.32 {d0, d1}, [r0]
+void vst1q_s64(__transfersize(2) int64_t * ptr, int64x2_t val); // VST1.64 {d0, d1}, [r0]
+void vst1q_f16(__transfersize(8) __fp16 * ptr, float16x8_t val); // VST1.16 {d0, d1}, [r0]
+void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0]
+void vst1q_p8(__transfersize(16) poly8_t * ptr, poly8x16_t val); // VST1.8 {d0, d1}, [r0]
+void vst1q_p16(__transfersize(8) poly16_t * ptr, poly16x8_t val); // VST1.16 {d0, d1}, [r0]
+
+//Store a lane of a vector into memory
+//Loads of an N-element structure
+//Load N-element structure from memory
+uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr); // VLD2.8 {d0, d2}, [r0]
+uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+int8x16x2_t vld2q_s8(__transfersize(32) int8_t const * ptr); // VLD2.8 {d0, d2}, [r0]
+int16x8x2_t vld2q_s16(__transfersize(16) int16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+int32x4x2_t vld2q_s32(__transfersize(8) int32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+float16x8x2_t vld2q_f16(__transfersize(16) __fp16 const * ptr); // VLD2.16 {d0, d2}, [r0]
+float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+poly8x16x2_t vld2q_p8(__transfersize(32) poly8_t const * ptr); // VLD2.8 {d0, d2}, [r0]
+poly16x8x2_t vld2q_p16(__transfersize(16) poly16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+int8x8x2_t vld2_s8(__transfersize(16) int8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+int16x4x2_t vld2_s16(__transfersize(8) int16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+int32x2x2_t vld2_s32(__transfersize(4) int32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+int64x1x2_t vld2_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+//float16x4x2_t vld2_f16(__transfersize(8) __fp16 const * ptr); // VLD2.16 {d0, d1}, [r0]
+float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+poly8x8x2_t vld2_p8(__transfersize(16) poly8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+poly16x4x2_t vld2_p16(__transfersize(8) poly16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+int8x16x3_t vld3q_s8(__transfersize(48) int8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+int16x8x3_t vld3q_s16(__transfersize(24) int16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+int32x4x3_t vld3q_s32(__transfersize(12) int32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+float16x8x3_t vld3q_f16(__transfersize(24) __fp16 const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+poly8x16x3_t vld3q_p8(__transfersize(48) poly8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+poly16x8x3_t vld3q_p16(__transfersize(24) poly16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+int8x8x3_t vld3_s8(__transfersize(24) int8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+int16x4x3_t vld3_s16(__transfersize(12) int16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+int32x2x3_t vld3_s32(__transfersize(6) int32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+int64x1x3_t vld3_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+float16x4x3_t vld3_f16(__transfersize(12) __fp16 const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+poly8x8x3_t vld3_p8(__transfersize(24) poly8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+poly16x4x3_t vld3_p16(__transfersize(12) poly16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+int8x16x4_t vld4q_s8(__transfersize(64) int8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+int16x8x4_t vld4q_s16(__transfersize(32) int16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+int32x4x4_t vld4q_s32(__transfersize(16) int32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+float16x8x4_t vld4q_f16(__transfersize(32) __fp16 const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+poly8x16x4_t vld4q_p8(__transfersize(64) poly8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+poly16x8x4_t vld4q_p16(__transfersize(32) poly16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+int8x8x4_t vld4_s8(__transfersize(32) int8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+int16x4x4_t vld4_s16(__transfersize(16) int16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+int32x2x4_t vld4_s32(__transfersize(8) int32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+int64x1x4_t vld4_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+float16x4x4_t vld4_f16(__transfersize(16) __fp16 const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+poly8x8x4_t vld4_p8(__transfersize(32) poly8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+poly16x4x4_t vld4_p16(__transfersize(16) poly16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+//Load all lanes of N-element structure with same value from memory
+uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+uint64x1x2_t vld2_dup_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+int8x8x2_t vld2_dup_s8(__transfersize(2) int8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+int16x4x2_t vld2_dup_s16(__transfersize(2) int16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+int32x2x2_t vld2_dup_s32(__transfersize(2) int32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+int64x1x2_t vld2_dup_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+//float16x4x2_t vld2_dup_f16(__transfersize(2) __fp16 const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+poly8x8x2_t vld2_dup_p8(__transfersize(2) poly8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+poly16x4x2_t vld2_dup_p16(__transfersize(2) poly16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+int8x8x3_t vld3_dup_s8(__transfersize(3) int8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+int16x4x3_t vld3_dup_s16(__transfersize(3) int16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+int32x2x3_t vld3_dup_s32(__transfersize(3) int32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+int64x1x3_t vld3_dup_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+float16x4x3_t vld3_dup_f16(__transfersize(3) __fp16 const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+poly8x8x3_t vld3_dup_p8(__transfersize(3) poly8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+poly16x4x3_t vld3_dup_p16(__transfersize(3) poly16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+int8x8x4_t vld4_dup_s8(__transfersize(4) int8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+int16x4x4_t vld4_dup_s16(__transfersize(4) int16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+int32x2x4_t vld4_dup_s32(__transfersize(4) int32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+int64x1x4_t vld4_dup_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+float16x4x4_t vld4_dup_f16(__transfersize(4) __fp16 const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+poly8x8x4_t vld4_dup_p8(__transfersize(4) poly8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+poly16x4x4_t vld4_dup_p16(__transfersize(4) poly16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+//Load a single lane of N-element structure from memory
+//the functions below are modified to deal with the error C2719: 'src': formal parameter with __declspec(align('16')) won't be aligned
+uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0]
+uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0]
+int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0]
+int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0]
+float16x8x2_t vld2q_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0]
+float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t * src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0]
+poly16x8x2_t vld2q_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x8x2_t * src, __constrange(0,7) int lane); // VLD2.16 {d0[0], d2[0]}, [r0]
+uint8x8x2_t vld2_lane_u8_ptr(__transfersize(2) uint8_t const * ptr, uint8x8x2_t * src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0]
+uint16x4x2_t vld2_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+uint32x2x2_t vld2_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x2x2_t * src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0]
+int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0]
+int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); //VLD2.16 {d0[0], d1[0]}, [r0]
+int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); //VLD2.32 {d0[0], d1[0]}, [r0]
+//float16x4x2_t vld2_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0]
+poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); //VLD2.8 {d0[0], d1[0]}, [r0]
+poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t * src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+uint8x8x3_t vld3_lane_u8_ptr(__transfersize(3) uint8_t const * ptr, uint8x8x3_t * src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+uint16x4x3_t vld3_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+uint32x2x3_t vld3_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x2x3_t * src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); //VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); //VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+float32x2x3_t vld3_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x2x3_t * src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+poly8x8x3_t vld3_lane_p8_ptr(__transfersize(3) poly8_t const * ptr, poly8x8x3_t * src, __constrange(0,7) int lane); //VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+poly16x4x3_t vld3_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+uint8x8x4_t vld4_lane_u8_ptr(__transfersize(4) uint8_t const * ptr, uint8x8x4_t * src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+uint16x4x4_t vld4_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x4x4_t * src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+uint32x2x4_t vld4_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x2x4_t * src, __constrange(0,1) int lane); // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); //VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); //VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+float32x2x4_t vld4_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x2x4_t * src, __constrange(0,1) int lane); // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); //VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+//Store N-element structure to memory
+void vst2q_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x16x2_t * val); // VST2.8 {d0, d2}, [r0]
+void vst2q_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x8x2_t * val); // VST2.16 {d0, d2}, [r0]
+void vst2q_u32_ptr(__transfersize(8) uint32_t * ptr, uint32x4x2_t * val); // VST2.32 {d0, d2}, [r0]
+void vst2q_s8_ptr(__transfersize(32) int8_t * ptr, int8x16x2_t * val); // VST2.8 {d0, d2}, [r0]
+void vst2q_s16_ptr(__transfersize(16) int16_t * ptr, int16x8x2_t * val); // VST2.16 {d0, d2}, [r0]
+void vst2q_s32_ptr(__transfersize(8) int32_t * ptr, int32x4x2_t * val); // VST2.32 {d0, d2}, [r0]
+void vst2q_f16_ptr(__transfersize(16) __fp16 * ptr, float16x8x2_t * val); // VST2.16 {d0, d2}, [r0]
+void vst2q_f32_ptr(__transfersize(8) float32_t * ptr, float32x4x2_t * val); // VST2.32 {d0, d2}, [r0]
+void vst2q_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x16x2_t * val); // VST2.8 {d0, d2}, [r0]
+void vst2q_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x8x2_t * val); // VST2.16 {d0, d2}, [r0]
+void vst2_u8_ptr(__transfersize(16) uint8_t * ptr, uint8x8x2_t * val); // VST2.8 {d0, d1}, [r0]
+void vst2_u16_ptr(__transfersize(8) uint16_t * ptr, uint16x4x2_t * val); // VST2.16 {d0, d1}, [r0]
+void vst2_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x2_t * val); // VST2.32 {d0, d1}, [r0]
+void vst2_u64_ptr(__transfersize(2) uint64_t * ptr, uint64x1x2_t * val); // VST1.64 {d0, d1}, [r0]
+void vst2_s8_ptr(__transfersize(16) int8_t * ptr, int8x8x2_t * val); // VST2.8 {d0, d1}, [r0]
+void vst2_s16_ptr(__transfersize(8) int16_t * ptr, int16x4x2_t * val); // VST2.16 {d0, d1}, [r0]
+void vst2_s32_ptr(__transfersize(4) int32_t * ptr, int32x2x2_t * val); // VST2.32 {d0, d1}, [r0]
+void vst2_s64_ptr(__transfersize(2) int64_t * ptr, int64x1x2_t * val); // VST1.64 {d0, d1}, [r0]
+//void vst2_f16_ptr(__transfersize(8) __fp16 * ptr, float16x4x2_t * val); // VST2.16 {d0, d1}, [r0]
+void vst2_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x2_t * val); // VST2.32 {d0, d1}, [r0]
+void vst2_p8_ptr(__transfersize(16) poly8_t * ptr, poly8x8x2_t * val); // VST2.8 {d0, d1}, [r0]
+void vst2_p16_ptr(__transfersize(8) poly16_t * ptr, poly16x4x2_t * val); // VST2.16 {d0, d1}, [r0]
+void vst3q_u8_ptr(__transfersize(48) uint8_t * ptr, uint8x16x3_t * val); // VST3.8 {d0, d2, d4}, [r0]
+void vst3q_u16_ptr(__transfersize(24) uint16_t * ptr, uint16x8x3_t * val); // VST3.16 {d0, d2, d4}, [r0]
+void vst3q_u32_ptr(__transfersize(12) uint32_t * ptr, uint32x4x3_t * val); // VST3.32 {d0, d2, d4}, [r0]
+void vst3q_s8_ptr(__transfersize(48) int8_t * ptr, int8x16x3_t * val); // VST3.8 {d0, d2, d4}, [r0]
+void vst3q_s16_ptr(__transfersize(24) int16_t * ptr, int16x8x3_t * val); // VST3.16 {d0, d2, d4}, [r0]
+void vst3q_s32_ptr(__transfersize(12) int32_t * ptr, int32x4x3_t * val); // VST3.32 {d0, d2, d4}, [r0]
+void vst3q_f16_ptr(__transfersize(24) __fp16 * ptr, float16x8x3_t * val); // VST3.16 {d0, d2, d4}, [r0]
+void vst3q_f32_ptr(__transfersize(12) float32_t * ptr, float32x4x3_t * val); // VST3.32 {d0, d2, d4}, [r0]
+void vst3q_p8_ptr(__transfersize(48) poly8_t * ptr, poly8x16x3_t * val); // VST3.8 {d0, d2, d4}, [r0]
+void vst3q_p16_ptr(__transfersize(24) poly16_t * ptr, poly16x8x3_t * val); // VST3.16 {d0, d2, d4}, [r0]
+void vst3_u8_ptr(__transfersize(24) uint8_t * ptr, uint8x8x3_t * val); // VST3.8 {d0, d1, d2}, [r0]
+void vst3_u16_ptr(__transfersize(12) uint16_t * ptr, uint16x4x3_t * val); // VST3.16 {d0, d1, d2}, [r0]
+void vst3_u32_ptr(__transfersize(6) uint32_t * ptr, uint32x2x3_t * val); // VST3.32 {d0, d1, d2}, [r0]
+void vst3_u64_ptr(__transfersize(3) uint64_t * ptr, uint64x1x3_t * val); // VST1.64 {d0, d1, d2}, [r0]
+void vst3_s8_ptr(__transfersize(24) int8_t * ptr, int8x8x3_t * val); // VST3.8 {d0, d1, d2}, [r0]
+void vst3_s16_ptr(__transfersize(12) int16_t * ptr, int16x4x3_t * val); // VST3.16 {d0, d1, d2}, [r0]
+void vst3_s32_ptr(__transfersize(6) int32_t * ptr, int32x2x3_t * val); // VST3.32 {d0, d1, d2}, [r0]
+void vst3_s64_ptr(__transfersize(3) int64_t * ptr, int64x1x3_t * val); // VST1.64 {d0, d1, d2}, [r0]
+void vst3_f16_ptr(__transfersize(12) __fp16 * ptr, float16x4x3_t * val); // VST3.16 {d0, d1, d2}, [r0]
+void vst3_f32_ptr(__transfersize(6) float32_t * ptr, float32x2x3_t * val); // VST3.32 {d0, d1, d2}, [r0]
+void vst3_p8_ptr(__transfersize(24) poly8_t * ptr, poly8x8x3_t * val); // VST3.8 {d0, d1, d2}, [r0]
+void vst3_p16_ptr(__transfersize(12) poly16_t * ptr, poly16x4x3_t * val); // VST3.16 {d0, d1, d2}, [r0]
+void vst4q_u8_ptr(__transfersize(64) uint8_t * ptr, uint8x16x4_t * val); // VST4.8 {d0, d2, d4, d6}, [r0]
+void vst4q_u16_ptr(__transfersize(32) uint16_t * ptr, uint16x8x4_t * val); // VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4q_u32_ptr(__transfersize(16) uint32_t * ptr, uint32x4x4_t * val); // VST4.32 {d0, d2, d4, d6}, [r0]
+void vst4q_s8_ptr(__transfersize(64) int8_t * ptr, int8x16x4_t * val); // VST4.8 {d0, d2, d4, d6}, [r0]
+void vst4q_s16_ptr(__transfersize(32) int16_t * ptr, int16x8x4_t * val); // VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4q_s32_ptr(__transfersize(16) int32_t * ptr, int32x4x4_t * val); // VST4.32 {d0, d2, d4, d6}, [r0]
+void vst4q_f16_ptr(__transfersize(32) __fp16 * ptr, float16x8x4_t * val); // VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4q_f32_ptr(__transfersize(16) float32_t * ptr, float32x4x4_t * val); // VST4.32 {d0, d2, d4, d6}, [r0]
+void vst4q_p8_ptr(__transfersize(64) poly8_t * ptr, poly8x16x4_t * val); // VST4.8 {d0, d2, d4, d6}, [r0]
+void vst4q_p16_ptr(__transfersize(32) poly16_t * ptr, poly16x8x4_t * val); // VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x8x4_t * val); // VST4.8 {d0, d1, d2, d3}, [r0]
+void vst4_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x4x4_t * val); // VST4.16 {d0, d1, d2, d3}, [r0]
+void vst4_u32_ptr(__transfersize(8) uint32_t * ptr, uint32x2x4_t * val); // VST4.32 {d0, d1, d2, d3}, [r0]
+void vst4_u64_ptr(__transfersize(4) uint64_t * ptr, uint64x1x4_t * val); // VST1.64 {d0, d1, d2, d3}, [r0]
+void vst4_s8_ptr(__transfersize(32) int8_t * ptr, int8x8x4_t * val); // VST4.8 {d0, d1, d2, d3}, [r0]
+void vst4_s16_ptr(__transfersize(16) int16_t * ptr, int16x4x4_t * val); // VST4.16 {d0, d1, d2, d3}, [r0]
+void vst4_s32_ptr(__transfersize(8) int32_t * ptr, int32x2x4_t * val); // VST4.32 {d0, d1, d2, d3}, [r0]
+void vst4_s64_ptr(__transfersize(4) int64_t * ptr, int64x1x4_t * val); // VST1.64 {d0, d1, d2, d3}, [r0]
+void vst4_f16_ptr(__transfersize(16) __fp16 * ptr, float16x4x4_t * val); // VST4.16 {d0, d1, d2, d3}, [r0]
+void vst4_f32_ptr(__transfersize(8) float32_t * ptr, float32x2x4_t * val); // VST4.32 {d0, d1, d2, d3}, [r0]
+void vst4_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x8x4_t * val); // VST4.8 {d0, d1, d2, d3}, [r0]
+void vst4_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x4x4_t * val); // VST4.16 {d0, d1, d2, d3}, [r0]
+//Store a single lane of N-element structure to memory
+void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0]
+void vst2q_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x4x2_t * val, __constrange(0,3) int lane); // VST2.32{d0[0], d2[0]}, [r0]
+void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0]
+void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); // VST2.32{d0[0], d2[0]}, [r0]
+void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0]
+void vst2q_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x4x2_t * val, __constrange(0,3) int lane); //VST2.32 {d0[0], d2[0]}, [r0]
+void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); // VST2.16{d0[0], d2[0]}, [r0]
+void vst2_lane_u8_ptr(__transfersize(2) uint8_t * ptr, uint8x8x2_t * val, __constrange(0,7) int lane); // VST2.8{d0[0], d1[0]}, [r0]
+void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0]
+void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0]
+void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); // VST2.8 {d0[0],d1[0]}, [r0]
+void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0]
+void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0]
+void vst2_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x4x2_t * val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0]
+void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); // VST2.32{d0[0], d1[0]}, [r0]
+void vst2_lane_p8_ptr(__transfersize(2) poly8_t * ptr, poly8x8x2_t * val, __constrange(0,7) int lane); // VST2.8{d0[0], d1[0]}, [r0]
+void vst2_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x4x2_t * val, __constrange(0,3) int lane); // VST2.16{d0[0], d1[0]}, [r0]
+void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t * val, __constrange(0,3) int lane); // VST3.32{d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); // VST3.32{d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t * val, __constrange(0,3) int lane); //VST3.32 {d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t * val, __constrange(0,7) int lane); // VST3.16{d0[0], d2[0], d4[0]}, [r0]
+void vst3_lane_u8_ptr(__transfersize(3) uint8_t * ptr, uint8x8x3_t * val, __constrange(0,7) int lane); // VST3.8{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x4x3_t * val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x2x3_t * val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); // VST3.8 {d0[0],d1[0], d2[0]}, [r0]
+void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); // VST3.32{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); // VST3.8{d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); // VST3.16{d0[0], d1[0], d2[0]}, [r0]
+void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t * val, __constrange(0,3) int lane); // VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); // VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t * val, __constrange(0,3) int lane); //VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); // VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4_lane_u8_ptr(__transfersize(4) uint8_t * ptr, uint8x8x4_t * val, __constrange(0,7) int lane); // VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x4x4_t * val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x4_t * val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_s8_ptr(__transfersize(4) int8_t * ptr, int8x8x4_t * val, __constrange(0,7) int lane); // VST4.8 {d0[0],d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x4x4_t * val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x2x4_t * val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x4_t * val, __constrange(0,1) int lane); // VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); // VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); // VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0]
+//Extract lanes from a vector and put into a register. These intrinsics extract a single lane (element) from a vector.
+
+uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
+uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // VMOV.U16 r0, d0[0]
+uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 r0, d0[0]
+int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 r0, d0[0]
+int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
+poly16_t vgetq_lane_p16(poly16x8_t vec, __constrange(0,7) int lane); // VMOV.U16 r0, d0[0]
+float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+
+int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
+uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
+//Load a single lane of a vector from a literal. These intrinsics set a single lane (element) within a vector.
+
+uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+poly8x16_t vsetq_lane_p8(poly8_t value, poly8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+poly16x8_t vsetq_lane_p16(poly16_t value, poly16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+
+int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0
+uint64x2_t vsetq_lane_u64(uint64_t value, uint64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0
+//Initialize a vector from a literal bit pattern.
+
+//Set all lanes to same value
+//Load all lanes of vector to the same literal value
+
+uint8x16_t vdupq_n_u8(uint8_t value); // VDUP.8 q0,r0
+uint16x8_t vdupq_n_u16(uint16_t value); // VDUP.16 q0,r0
+uint32x4_t vdupq_n_u32(uint32_t value); // VDUP.32 q0,r0
+int8x16_t vdupq_n_s8(int8_t value); // VDUP.8 q0,r0
+int16x8_t vdupq_n_s16(int16_t value); // VDUP.16 q0,r0
+int32x4_t vdupq_n_s32(int32_t value); // VDUP.32 q0,r0
+poly8x16_t vdupq_n_p8(poly8_t value); // VDUP.8 q0,r0
+poly16x8_t vdupq_n_p16(poly16_t value); // VDUP.16 q0,r0
+float32x4_t vdupq_n_f32(float32_t value); // VDUP.32 q0,r0
+
+int64x2_t vdupq_n_s64(int64_t value); // VMOV d0,r0,r0
+uint64x2_t vdupq_n_u64(uint64_t value); // VMOV d0,r0,r0
+
+uint8x16_t vmovq_n_u8(uint8_t value); // VDUP.8 q0,r0
+uint16x8_t vmovq_n_u16(uint16_t value); // VDUP.16 q0,r0
+uint32x4_t vmovq_n_u32(uint32_t value); // VDUP.32 q0,r0
+int8x16_t vmovq_n_s8(int8_t value); // VDUP.8 q0,r0
+int16x8_t vmovq_n_s16(int16_t value); // VDUP.16 q0,r0
+int32x4_t vmovq_n_s32(int32_t value); // VDUP.32 q0,r0
+poly8x16_t vmovq_n_p8(poly8_t value); // VDUP.8 q0,r0
+poly16x8_t vmovq_n_p16(poly16_t value); // VDUP.16 q0,r0
+float32x4_t vmovq_n_f32(float32_t value); // VDUP.32 q0,r0
+
+int64x2_t vmovq_n_s64(int64_t value); // VMOV d0,r0,r0
+uint64x2_t vmovq_n_u64(uint64_t value); // VMOV d0,r0,r0
+//Load all lanes of the vector to the value of a lane of a vector
+
+//Combining vectors. These intrinsics join two 64 bit vectors into a single 128bit vector.
+
+//Splitting vectors. These intrinsics split a 128 bit vector into 2 component 64 bit vectors
+
+//Converting vectors. These intrinsics are used to convert vectors.
+//Convert from float
+
+int32x4_t vcvtq_s32_f32(float32x4_t a); // VCVT.S32.F32 q0, q0
+uint32x4_t vcvtq_u32_f32(float32x4_t a); // VCVT.U32.F32 q0, q0
+
+int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.S32.F32 q0, q0, #32
+uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.U32.F32 q0, q0, #32
+//Convert to float
+
+float32x4_t vcvtq_f32_s32(int32x4_t a); // VCVT.F32.S32 q0, q0
+float32x4_t vcvtq_f32_u32(uint32x4_t a); // VCVT.F32.U32 q0, q0
+
+float32x4_t vcvtq_n_f32_s32(int32x4_t a, __constrange(1,32) int b); // VCVT.F32.S32 q0, q0, #32
+float32x4_t vcvtq_n_f32_u32(uint32x4_t a, __constrange(1,32) int b); // VCVT.F32.U32 q0, q0, #32
+//Convert between floats
+
+//Vector narrow integer
+
+//Vector long move
+
+//Vector saturating narrow integer
+
+//Vector saturating narrow integer signed->unsigned
+
+//Table look up
+
+//Extended table look up intrinsics
+
+//Operations with a scalar value
+//Vector multiply accumulate with scalar
+
+//Vector widening multiply accumulate with scalar
+
+//Vector widening saturating doubling multiply accumulate with scalar
+
+//Vector multiply subtract with scalar
+
+//Vector widening multiply subtract with scalar
+
+//Vector widening saturating doubling multiply subtract with scalar
+
+//Vector multiply by scalar
+
+int16x8_t vmulq_n_s16(int16x8_t a, int16_t b); // VMUL.I16 q0,q0,d0[0]
+int32x4_t vmulq_n_s32(int32x4_t a, int32_t b); // VMUL.I32 q0,q0,d0[0]
+float32x4_t vmulq_n_f32(float32x4_t a, float32_t b); // VMUL.F32 q0,q0,d0[0]
+uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b); // VMUL.I16 q0,q0,d0[0]
+uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b); // VMUL.I32 q0,q0,d0[0]
+//Vector long multiply with scalar
+
+//Vector long multiply by scalar
+
+//Vector saturating doubling long multiply with scalar
+
+//Vector saturating doubling long multiply by scalar
+
+//Vector saturating doubling multiply high with scalar
+
+int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQDMULH.S16 q0,q0,d0[0]
+int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQDMULH.S32 q0,q0,d0[0]
+//Vector saturating doubling multiply high by scalar
+
+//Vector saturating rounding doubling multiply high with scalar
+
+int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQRDMULH.S16 q0,q0,d0[0]
+int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQRDMULH.S32 q0,q0,d0[0]
+//Vector rounding saturating doubling multiply high by scalar
+
+//Vector multiply accumulate with scalar
+
+int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLA.I16 q0, q0, d0[0]
+int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLA.I32 q0, q0, d0[0]
+uint16x8_t vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLA.I16 q0, q0, d0[0]
+uint32x4_t vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLA.I32 q0, q0, d0[0]
+float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLA.F32 q0, q0, d0[0]
+//Vector widening multiply accumulate with scalar
+
+//Vector widening saturating doubling multiply accumulate with scalar
+
+//Vector multiply subtract with scalar
+
+int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLS.I16 q0, q0, d0[0]
+int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLS.I32 q0, q0, d0[0]
+uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLS.I16 q0, q0, d0[0]
+uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLS.I32 q0, q0, d0[0]
+float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLS.F32 q0, q0, d0[0]
+//Vector widening multiply subtract with scalar
+
+//Vector widening saturating doubling multiply subtract with scalar
+
+//Vector extract
+
+int8x16_t vextq_s8(int8x16_t a, int8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+uint8x16_t vextq_u8(uint8x16_t a, uint8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+poly8x16_t vextq_p8(poly8x16_t a, poly8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+int16x8_t vextq_s16(int16x8_t a, int16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+uint16x8_t vextq_u16(uint16x8_t a, uint16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+poly16x8_t vextq_p16(poly16x8_t a, poly16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+int32x4_t vextq_s32(int32x4_t a, int32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0
+uint32x4_t vextq_u32(uint32x4_t a, uint32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0
+int64x2_t vextq_s64(int64x2_t a, int64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0
+uint64x2_t vextq_u64(uint64x2_t a, uint64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0
+//Reverse vector elements (swap endianness). VREVn.m reverses the order of the m-bit lanes within a set that is n bits wide.
+
+int8x16_t vrev64q_s8(int8x16_t vec); // VREV64.8 q0,q0
+int16x8_t vrev64q_s16(int16x8_t vec); // VREV64.16 q0,q0
+int32x4_t vrev64q_s32(int32x4_t vec); // VREV64.32 q0,q0
+uint8x16_t vrev64q_u8(uint8x16_t vec); // VREV64.8 q0,q0
+uint16x8_t vrev64q_u16(uint16x8_t vec); // VREV64.16 q0,q0
+uint32x4_t vrev64q_u32(uint32x4_t vec); // VREV64.32 q0,q0
+poly8x16_t vrev64q_p8(poly8x16_t vec); // VREV64.8 q0,q0
+poly16x8_t vrev64q_p16(poly16x8_t vec); // VREV64.16 q0,q0
+float32x4_t vrev64q_f32(float32x4_t vec); // VREV64.32 q0,q0
+
+int8x16_t vrev32q_s8(int8x16_t vec); // VREV32.8 q0,q0
+int16x8_t vrev32q_s16(int16x8_t vec); // VREV32.16 q0,q0
+uint8x16_t vrev32q_u8(uint8x16_t vec); // VREV32.8 q0,q0
+uint16x8_t vrev32q_u16(uint16x8_t vec); // VREV32.16 q0,q0
+poly8x16_t vrev32q_p8(poly8x16_t vec); // VREV32.8 q0,q0
+
+int8x16_t vrev16q_s8(int8x16_t vec); // VREV16.8 q0,q0
+uint8x16_t vrev16q_u8(uint8x16_t vec); // VREV16.8 q0,q0
+poly8x16_t vrev16q_p8(poly8x16_t vec); // VREV16.8 q0,q0
+//Other single operand arithmetic
+//Absolute: Vd[i] = |Va[i]|
+
+int8x16_t vabsq_s8(int8x16_t a); // VABS.S8 q0,q0
+int16x8_t vabsq_s16(int16x8_t a); // VABS.S16 q0,q0
+int32x4_t vabsq_s32(int32x4_t a); // VABS.S32 q0,q0
+float32x4_t vabsq_f32(float32x4_t a); // VABS.F32 q0,q0
+//Saturating absolute: Vd[i] = sat(|Va[i]|)
+
+int8x16_t vqabsq_s8(int8x16_t a); // VQABS.S8 q0,q0
+int16x8_t vqabsq_s16(int16x8_t a); // VQABS.S16 q0,q0
+int32x4_t vqabsq_s32(int32x4_t a); // VQABS.S32 q0,q0
+//Negate: Vd[i] = - Va[i]
+
+int8x16_t vnegq_s8(int8x16_t a); // VNE//q0,q0
+int16x8_t vnegq_s16(int16x8_t a); // VNE//q0,q0
+int32x4_t vnegq_s32(int32x4_t a); // VNE//q0,q0
+float32x4_t vnegq_f32(float32x4_t a); // VNE//q0,q0
+//Saturating Negate: sat(Vd[i] = - Va[i])
+
+int8x16_t vqnegq_s8(int8x16_t a); // VQNE//q0,q0
+int16x8_t vqnegq_s16(int16x8_t a); // VQNE//q0,q0
+int32x4_t vqnegq_s32(int32x4_t a); // VQNE//q0,q0
+//Count leading sign bits
+
+int8x16_t vclsq_s8(int8x16_t a); // VCLS.S8 q0,q0
+int16x8_t vclsq_s16(int16x8_t a); // VCLS.S16 q0,q0
+int32x4_t vclsq_s32(int32x4_t a); // VCLS.S32 q0,q0
+//Count leading zeros
+
+int8x16_t vclzq_s8(int8x16_t a); // VCLZ.I8 q0,q0
+int16x8_t vclzq_s16(int16x8_t a); // VCLZ.I16 q0,q0
+int32x4_t vclzq_s32(int32x4_t a); // VCLZ.I32 q0,q0
+uint8x16_t vclzq_u8(uint8x16_t a); // VCLZ.I8 q0,q0
+uint16x8_t vclzq_u16(uint16x8_t a); // VCLZ.I16 q0,q0
+uint32x4_t vclzq_u32(uint32x4_t a); // VCLZ.I32 q0,q0
+//Count number of set bits
+
+uint8x16_t vcntq_u8(uint8x16_t a); // VCNT.8 q0,q0
+int8x16_t vcntq_s8(int8x16_t a); // VCNT.8 q0,q0
+poly8x16_t vcntq_p8(poly8x16_t a); // VCNT.8 q0,q0
+//Reciprocal estimate
+
+float32x4_t vrecpeq_f32(float32x4_t a); // VRECPE.F32 q0,q0
+uint32x4_t vrecpeq_u32(uint32x4_t a); // VRECPE.U32 q0,q0
+//Reciprocal square root estimate
+
+float32x4_t vrsqrteq_f32(float32x4_t a); // VRSQRTE.F32 q0,q0
+uint32x4_t vrsqrteq_u32(uint32x4_t a); // VRSQRTE.U32 q0,q0
+//Logical operations
+//Bitwise not
+
+int8x16_t vmvnq_s8(int8x16_t a); // VMVN q0,q0
+int16x8_t vmvnq_s16(int16x8_t a); // VMVN q0,q0
+int32x4_t vmvnq_s32(int32x4_t a); // VMVN q0,q0
+uint8x16_t vmvnq_u8(uint8x16_t a); // VMVN q0,q0
+uint16x8_t vmvnq_u16(uint16x8_t a); // VMVN q0,q0
+uint32x4_t vmvnq_u32(uint32x4_t a); // VMVN q0,q0
+poly8x16_t vmvnq_p8(poly8x16_t a); // VMVN q0,q0
+//Bitwise and
+
+int8x16_t vandq_s8(int8x16_t a, int8x16_t b); // VAND q0,q0,q0
+int16x8_t vandq_s16(int16x8_t a, int16x8_t b); // VAND q0,q0,q0
+int32x4_t vandq_s32(int32x4_t a, int32x4_t b); // VAND q0,q0,q0
+int64x2_t vandq_s64(int64x2_t a, int64x2_t b); // VAND q0,q0,q0
+uint8x16_t vandq_u8(uint8x16_t a, uint8x16_t b); // VAND q0,q0,q0
+uint16x8_t vandq_u16(uint16x8_t a, uint16x8_t b); // VAND q0,q0,q0
+uint32x4_t vandq_u32(uint32x4_t a, uint32x4_t b); // VAND q0,q0,q0
+uint64x2_t vandq_u64(uint64x2_t a, uint64x2_t b); // VAND q0,q0,q0
+//Bitwise or
+
+int8x16_t vorrq_s8(int8x16_t a, int8x16_t b); // VORR q0,q0,q0
+int16x8_t vorrq_s16(int16x8_t a, int16x8_t b); // VORR q0,q0,q0
+int32x4_t vorrq_s32(int32x4_t a, int32x4_t b); // VORR q0,q0,q0
+int64x2_t vorrq_s64(int64x2_t a, int64x2_t b); // VORR q0,q0,q0
+uint8x16_t vorrq_u8(uint8x16_t a, uint8x16_t b); // VORR q0,q0,q0
+uint16x8_t vorrq_u16(uint16x8_t a, uint16x8_t b); // VORR q0,q0,q0
+uint32x4_t vorrq_u32(uint32x4_t a, uint32x4_t b); // VORR q0,q0,q0
+uint64x2_t vorrq_u64(uint64x2_t a, uint64x2_t b); // VORR q0,q0,q0
+//Bitwise exclusive or (EOR or XOR)
+
+int8x16_t veorq_s8(int8x16_t a, int8x16_t b); // VEOR q0,q0,q0
+int16x8_t veorq_s16(int16x8_t a, int16x8_t b); // VEOR q0,q0,q0
+int32x4_t veorq_s32(int32x4_t a, int32x4_t b); // VEOR q0,q0,q0
+int64x2_t veorq_s64(int64x2_t a, int64x2_t b); // VEOR q0,q0,q0
+uint8x16_t veorq_u8(uint8x16_t a, uint8x16_t b); // VEOR q0,q0,q0
+uint16x8_t veorq_u16(uint16x8_t a, uint16x8_t b); // VEOR q0,q0,q0
+uint32x4_t veorq_u32(uint32x4_t a, uint32x4_t b); // VEOR q0,q0,q0
+uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b); // VEOR q0,q0,q0
+//Bit Clear
+
+int8x16_t vbicq_s8(int8x16_t a, int8x16_t b); // VBIC q0,q0,q0
+int16x8_t vbicq_s16(int16x8_t a, int16x8_t b); // VBIC q0,q0,q0
+int32x4_t vbicq_s32(int32x4_t a, int32x4_t b); // VBIC q0,q0,q0
+int64x2_t vbicq_s64(int64x2_t a, int64x2_t b); // VBIC q0,q0,q0
+uint8x16_t vbicq_u8(uint8x16_t a, uint8x16_t b); // VBIC q0,q0,q0
+uint16x8_t vbicq_u16(uint16x8_t a, uint16x8_t b); // VBIC q0,q0,q0
+uint32x4_t vbicq_u32(uint32x4_t a, uint32x4_t b); // VBIC q0,q0,q0
+uint64x2_t vbicq_u64(uint64x2_t a, uint64x2_t b); // VBIC q0,q0,q0
+//Bitwise OR complement
+
+int8x16_t vornq_s8(int8x16_t a, int8x16_t b); // VORN q0,q0,q0
+int16x8_t vornq_s16(int16x8_t a, int16x8_t b); // VORN q0,q0,q0
+int32x4_t vornq_s32(int32x4_t a, int32x4_t b); // VORN q0,q0,q0
+int64x2_t vornq_s64(int64x2_t a, int64x2_t b); // VORN q0,q0,q0
+uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b); // VORN q0,q0,q0
+uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b); // VORN q0,q0,q0
+uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b); // VORN q0,q0,q0
+uint64x2_t vornq_u64(uint64x2_t a, uint64x2_t b); // VORN q0,q0,q0
+//Bitwise Select
+
+int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c); // VBSL q0,q0,q0
+int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c); // VBSL q0,q0,q0
+int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c); // VBSL q0,q0,q0
+int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c); // VBSL q0,q0,q0
+uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VBSL q0,q0,q0
+uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VBSL q0,q0,q0
+uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VBSL q0,q0,q0
+uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c); // VBSL q0,q0,q0
+float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c); // VBSL q0,q0,q0
+poly8x16_t vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c); // VBSL q0,q0,q0
+poly16x8_t vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c); // VBSL q0,q0,q0
+//Transposition operations
+//Transpose elements
+
+int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b); // VTRN.8 q0,q0
+int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b); // VTRN.16 q0,q0
+int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b); // VTRN.32 q0,q0
+uint8x16x2_t vtrnq_u8(uint8x16_t a, uint8x16_t b); // VTRN.8 q0,q0
+uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b); // VTRN.16 q0,q0
+uint32x4x2_t vtrnq_u32(uint32x4_t a, uint32x4_t b); // VTRN.32 q0,q0
+float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b); // VTRN.32 q0,q0
+poly8x16x2_t vtrnq_p8(poly8x16_t a, poly8x16_t b); // VTRN.8 q0,q0
+poly16x8x2_t vtrnq_p16(poly16x8_t a, poly16x8_t b); // VTRN.16 q0,q0
+//Interleave elements
+
+int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b); // VZIP.8 q0,q0
+int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b); // VZIP.16 q0,q0
+int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b); // VZIP.32 q0,q0
+uint8x16x2_t vzipq_u8(uint8x16_t a, uint8x16_t b); // VZIP.8 q0,q0
+uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b); // VZIP.16 q0,q0
+uint32x4x2_t vzipq_u32(uint32x4_t a, uint32x4_t b); // VZIP.32 q0,q0
+float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b); // VZIP.32 q0,q0
+poly8x16x2_t vzipq_p8(poly8x16_t a, poly8x16_t b); // VZIP.8 q0,q0
+poly16x8x2_t vzipq_p16(poly16x8_t a, poly16x8_t b); // VZIP.16 q0,q0
+//De-Interleave elements
+
+int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b); // VUZP.8 q0,q0
+int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b); // VUZP.16 q0,q0
+int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b); // VUZP.32 q0,q0
+uint8x16x2_t vuzpq_u8(uint8x16_t a, uint8x16_t b); // VUZP.8 q0,q0
+uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b); // VUZP.16 q0,q0
+uint32x4x2_t vuzpq_u32(uint32x4_t a, uint32x4_t b); // VUZP.32 q0,q0
+float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b); // VUZP.32 q0,q0
+poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0
+poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0
+
+//^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+// the following macros solve the problem of the "immediate parameters requirement" for some x86 intrinsics. While for release build it is not a must,
+//for debug build we need it to compile the code unless the "Intrinsic parameter must be an immediate value" error is our goal
+//
+#if ( ((defined _MSC_VER) && (_MSC_VER > 1600)) || defined __INTEL_COMPILER )&& defined NDEBUG //if it is a release build, we also need it to fix the issue for VS2010 and earlier compilers.
+
+ #if defined(USE_SSSE3)
+ #define _MM_ALIGNR_EPI8 _mm_alignr_epi8
+ #endif
+
+ #define _MM_EXTRACT_EPI16 _mm_extract_epi16
+ #define _MM_INSERT_EPI16 _mm_insert_epi16
+ #ifdef USE_SSE4
+ #define _MM_EXTRACT_EPI8 _mm_extract_epi8
+ #define _MM_EXTRACT_EPI32 _mm_extract_epi32
+ #define _MM_EXTRACT_PS _mm_extract_ps
+
+ #define _MM_INSERT_EPI8 _mm_insert_epi8
+ #define _MM_INSERT_EPI32 _mm_insert_epi32
+ #define _MM_INSERT_PS _mm_insert_ps
+ #ifdef _M_X64
+ #define _MM_INSERT_EPI64 _mm_insert_epi64
+ #define _MM_EXTRACT_EPI64 _mm_extract_epi64
+ #endif
+ #endif //SSE4
+#else
+ #define _NEON2SSE_COMMA ,
+ #define _NEON2SSE_SWITCH16(NAME, a, b, LANE) \
+ switch(LANE) \
+ { \
+ case 0: return NAME(a b, 0); \
+ case 1: return NAME(a b, 1); \
+ case 2: return NAME(a b, 2); \
+ case 3: return NAME(a b, 3); \
+ case 4: return NAME(a b, 4); \
+ case 5: return NAME(a b, 5); \
+ case 6: return NAME(a b, 6); \
+ case 7: return NAME(a b, 7); \
+ case 8: return NAME(a b, 8); \
+ case 9: return NAME(a b, 9); \
+ case 10: return NAME(a b, 10); \
+ case 11: return NAME(a b, 11); \
+ case 12: return NAME(a b, 12); \
+ case 13: return NAME(a b, 13); \
+ case 14: return NAME(a b, 14); \
+ case 15: return NAME(a b, 15); \
+ default: return NAME(a b, 0); \
+ }
+
+ #define _NEON2SSE_SWITCH8(NAME, vec, LANE, p) \
+ switch(LANE) \
+ { \
+ case 0: return NAME(vec p,0); \
+ case 1: return NAME(vec p,1); \
+ case 2: return NAME(vec p,2); \
+ case 3: return NAME(vec p,3); \
+ case 4: return NAME(vec p,4); \
+ case 5: return NAME(vec p,5); \
+ case 6: return NAME(vec p,6); \
+ case 7: return NAME(vec p,7); \
+ default: return NAME(vec p,0); \
+ }
+
+ #define _NEON2SSE_SWITCH4(NAME, case0, case1, case2, case3, vec, LANE, p) \
+ switch(LANE) \
+ { \
+ case case0: return NAME(vec p,case0); \
+ case case1: return NAME(vec p,case1); \
+ case case2: return NAME(vec p,case2); \
+ case case3: return NAME(vec p,case3); \
+ default: return NAME(vec p,case0); \
+ }
+
+ #if defined(USE_SSSE3)
+ _NEON2SSE_INLINE __m128i _MM_ALIGNR_EPI8(__m128i a, __m128i b, int LANE)
+ {
+ _NEON2SSE_SWITCH16(_mm_alignr_epi8, a, _NEON2SSE_COMMA b, LANE)
+ }
+ #endif
+
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI16(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_SWITCH8(_mm_insert_epi16, vec, LANE, _NEON2SSE_COMMA p)
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_EPI16(__m128i vec, const int LANE)
+ {
+ _NEON2SSE_SWITCH8(_mm_extract_epi16, vec, LANE,)
+ }
+
+ #ifdef USE_SSE4
+ _NEON2SSE_INLINE int _MM_EXTRACT_EPI32(__m128i vec, const int LANE)
+ {
+ _NEON2SSE_SWITCH4(_mm_extract_epi32, 0,1,2,3, vec, LANE,)
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_PS(__m128 vec, const int LANE)
+ {
+ _NEON2SSE_SWITCH4(_mm_extract_ps, 0,1,2,3, vec, LANE,)
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_EPI8(__m128i vec, const int LANE)
+ {
+ _NEON2SSE_SWITCH16(_mm_extract_epi8, vec, , LANE)
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI32(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_SWITCH4(_mm_insert_epi32, 0, 1, 2, 3, vec, LANE, _NEON2SSE_COMMA p)
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI8(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_SWITCH16(_mm_insert_epi8, vec, _NEON2SSE_COMMA p, LANE)
+ }
+ #ifdef _M_X64
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI64(__m128i vec, int p, const int LANE)
+ {
+ switch(LANE)
+ {
+ case 0:
+ return _mm_insert_epi64(vec, p, 0);
+ case 1:
+ return _mm_insert_epi64(vec, p, 1);
+ default:
+ return _mm_insert_epi64(vec, p, 0);
+ }
+ }
+
+ _NEON2SSE_INLINE int64_t _MM_EXTRACT_EPI64(__m128i val, const int LANE)
+ {
+ if (LANE ==0) return _mm_extract_epi64(val, 0);
+ else return _mm_extract_epi64(val, 1);
+ }
+ #endif
+ _NEON2SSE_INLINE __m128 _MM_INSERT_PS(__m128 vec, __m128 p, const int LANE)
+ {
+ _NEON2SSE_SWITCH4(_mm_insert_ps, 0, 16, 32, 48, vec, LANE, _NEON2SSE_COMMA p)
+ }
+
+ #endif //USE_SSE4
+
+#endif //#ifdef NDEBUG
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Below are some helper functions used either for SSE4 intrinsics "emulation" for SSSE3 limited devices
+// or for some specific commonly used operations implementation missing in SSE
+#ifdef USE_SSE4
+ #define _MM_CVTEPU8_EPI16 _mm_cvtepu8_epi16
+ #define _MM_CVTEPU16_EPI32 _mm_cvtepu16_epi32
+ #define _MM_CVTEPU32_EPI64 _mm_cvtepu32_epi64
+
+ #define _MM_CVTEPI8_EPI16 _mm_cvtepi8_epi16
+ #define _MM_CVTEPI16_EPI32 _mm_cvtepi16_epi32
+ #define _MM_CVTEPI32_EPI64 _mm_cvtepi32_epi64
+
+ #define _MM_MAX_EPI8 _mm_max_epi8
+ #define _MM_MAX_EPI32 _mm_max_epi32
+ #define _MM_MAX_EPU16 _mm_max_epu16
+ #define _MM_MAX_EPU32 _mm_max_epu32
+
+ #define _MM_MIN_EPI8 _mm_min_epi8
+ #define _MM_MIN_EPI32 _mm_min_epi32
+ #define _MM_MIN_EPU16 _mm_min_epu16
+ #define _MM_MIN_EPU32 _mm_min_epu32
+
+ #define _MM_BLENDV_EPI8 _mm_blendv_epi8
+ #define _MM_PACKUS_EPI32 _mm_packus_epi32
+ #define _MM_PACKUS1_EPI32(a) _mm_packus_epi32(a, a)
+
+ #define _MM_MULLO_EPI32 _mm_mullo_epi32
+ #define _MM_MUL_EPI32 _mm_mul_epi32
+#else //no SSE4 !!!!!!
+ _NEON2SSE_INLINE __m128i _MM_CVTEPU8_EPI16(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ return _mm_unpacklo_epi8(a, zero);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_CVTEPU16_EPI32(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ return _mm_unpacklo_epi16(a, zero);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_CVTEPU32_EPI64(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ return _mm_unpacklo_epi32(a, zero);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_CVTEPI8_EPI16(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ __m128i sign = _mm_cmpgt_epi8(zero, a);
+ return _mm_unpacklo_epi8(a, sign);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_CVTEPI16_EPI32(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ __m128i sign = _mm_cmpgt_epi16(zero, a);
+ return _mm_unpacklo_epi16(a, sign);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_CVTEPI32_EPI64(__m128i a)
+ {
+ __m128i zero = _mm_setzero_si128();
+ __m128i sign = _mm_cmpgt_epi32(zero, a);
+ return _mm_unpacklo_epi32(a, sign);
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_EPI32(__m128i vec, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int32_t tmp[4];
+ _mm_store_si128((__m128i*)tmp, vec);
+ return tmp[LANE];
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_EPI8(__m128i vec, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int8_t tmp[16];
+ _mm_store_si128((__m128i*)tmp, vec);
+ return (int)tmp[LANE];
+ }
+
+ _NEON2SSE_INLINE int _MM_EXTRACT_PS(__m128 vec, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int32_t tmp[4];
+ _mm_store_si128((__m128i*)tmp, _M128i(vec));
+ return tmp[LANE];
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI32(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int32_t pvec[4] = {0,0,0,0};
+ _NEON2SSE_ALIGN_16 uint32_t mask[4] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff};
+ __m128i vec_masked, p_masked;
+ pvec[LANE] = p;
+ mask[LANE] = 0x0;
+ vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p
+ p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec
+ return _mm_or_si128(vec_masked, p_masked);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI8(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int8_t pvec[16] = {0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0};
+ _NEON2SSE_ALIGN_16 uint8_t mask[16] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
+ __m128i vec_masked, p_masked;
+ pvec[LANE] = (int8_t)p;
+ mask[LANE] = 0x0;
+ vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p
+ p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec
+ return _mm_or_si128(vec_masked, p_masked);
+ }
+
+ _NEON2SSE_INLINE __m128 _MM_INSERT_PS(__m128 vec, __m128 p, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int32_t mask[4] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff};
+ __m128 tmp, vec_masked, p_masked;
+ mask[LANE >> 4] = 0x0; //here the LANE is not actural lane, need to deal with it
+ vec_masked = _mm_and_ps (*(__m128*)mask,vec); //ready for p
+ p_masked = _mm_andnot_ps (*(__m128*)mask, p); //ready for vec
+ tmp = _mm_or_ps(vec_masked, p_masked);
+ return tmp;
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MAX_EPI8(__m128i a, __m128i b)
+ {
+ __m128i cmp, resa, resb;
+ cmp = _mm_cmpgt_epi8 (a, b);
+ resa = _mm_and_si128 (cmp, a);
+ resb = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(resa, resb);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MAX_EPI32(__m128i a, __m128i b)
+ {
+ __m128i cmp, resa, resb;
+ cmp = _mm_cmpgt_epi32(a, b);
+ resa = _mm_and_si128 (cmp, a);
+ resb = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(resa, resb);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MAX_EPU16(__m128i a, __m128i b)
+ {
+ __m128i c8000, b_s, a_s, cmp;
+ c8000 = _mm_cmpeq_epi16 (a,a); //0xffff
+ c8000 = _mm_slli_epi16 (c8000, 15); //0x8000
+ b_s = _mm_sub_epi16 (b, c8000);
+ a_s = _mm_sub_epi16 (a, c8000);
+ cmp = _mm_cmpgt_epi16 (a_s, b_s); //no unsigned comparison, need to go to signed
+ a_s = _mm_and_si128 (cmp,a);
+ b_s = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(a_s, b_s);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MAX_EPU32(__m128i a, __m128i b)
+ {
+ __m128i c80000000, b_s, a_s, cmp;
+ c80000000 = _mm_cmpeq_epi32 (a,a); //0xffffffff
+ c80000000 = _mm_slli_epi32 (c80000000, 31); //0x80000000
+ b_s = _mm_sub_epi32 (b, c80000000);
+ a_s = _mm_sub_epi32 (a, c80000000);
+ cmp = _mm_cmpgt_epi32 (a_s, b_s); //no unsigned comparison, need to go to signed
+ a_s = _mm_and_si128 (cmp,a);
+ b_s = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(a_s, b_s);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MIN_EPI8(__m128i a, __m128i b)
+ {
+ __m128i cmp, resa, resb;
+ cmp = _mm_cmpgt_epi8 (b, a);
+ resa = _mm_and_si128 (cmp, a);
+ resb = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(resa, resb);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MIN_EPI32(__m128i a, __m128i b)
+ {
+ __m128i cmp, resa, resb;
+ cmp = _mm_cmpgt_epi32(b, a);
+ resa = _mm_and_si128 (cmp, a);
+ resb = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(resa, resb);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MIN_EPU16(__m128i a, __m128i b)
+ {
+ __m128i c8000, b_s, a_s, cmp;
+ c8000 = _mm_cmpeq_epi16 (a,a); //0xffff
+ c8000 = _mm_slli_epi16 (c8000, 15); //0x8000
+ b_s = _mm_sub_epi16 (b, c8000);
+ a_s = _mm_sub_epi16 (a, c8000);
+ cmp = _mm_cmpgt_epi16 (b_s, a_s); //no unsigned comparison, need to go to signed
+ a_s = _mm_and_si128 (cmp,a);
+ b_s = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(a_s, b_s);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_MIN_EPU32(__m128i a, __m128i b)
+ {
+ __m128i c80000000, b_s, a_s, cmp;
+ c80000000 = _mm_cmpeq_epi32 (a,a); //0xffffffff
+ c80000000 = _mm_slli_epi32 (c80000000, 31); //0x80000000
+ b_s = _mm_sub_epi32 (b, c80000000);
+ a_s = _mm_sub_epi32 (a, c80000000);
+ cmp = _mm_cmpgt_epi32 (b_s, a_s); //no unsigned comparison, need to go to signed
+ a_s = _mm_and_si128 (cmp,a);
+ b_s = _mm_andnot_si128 (cmp,b);
+ return _mm_or_si128(a_s, b_s);
+ }
+
+ _NEON2SSE_INLINE __m128i _MM_BLENDV_EPI8(__m128i a, __m128i b, __m128i mask) //this is NOT exact implementation of _mm_blendv_epi8 !!!!! - please see below
+ { //it assumes mask is either 0xff or 0 always (like in all usecases below) while for the original _mm_blendv_epi8 only MSB mask byte matters.
+ __m128i a_masked, b_masked;
+ b_masked = _mm_and_si128 (mask,b); //use b if mask 0xff
+ a_masked = _mm_andnot_si128 (mask,a);
+ return _mm_or_si128(a_masked, b_masked);
+ }
+
+ #if defined(USE_SSSE3)
+ _NEON2SSE_INLINE __m128i _MM_PACKUS_EPI32(__m128i a, __m128i b)
+ {
+ _NEON2SSE_ALIGN_16 int8_t mask8_32_even_odd[16] = { 0,1, 4,5, 8,9, 12,13, 2,3, 6,7,10,11,14,15};
+ __m128i a16, b16, res, reshi,cmp, zero;
+ zero = _mm_setzero_si128();
+ a16 = _mm_shuffle_epi8 (a, *(__m128i*) mask8_32_even_odd);
+ b16 = _mm_shuffle_epi8 (b, *(__m128i*) mask8_32_even_odd);
+ res = _mm_unpacklo_epi64(a16, b16); //result without saturation
+ reshi = _mm_unpackhi_epi64(a16, b16); //hi part of result used for saturation
+ cmp = _mm_cmpgt_epi16(zero, reshi); //if cmp<0 the result should be zero
+ res = _mm_andnot_si128(cmp,res); //if cmp zero - do nothing, otherwise cmp <0 and the result is 0
+ cmp = _mm_cmpgt_epi16(reshi,zero); //if cmp positive
+ return _mm_or_si128(res, cmp); //if cmp positive we are out of 16bits need to saturaate to 0xffff
+ }
+ #endif
+
+ #if defined(USE_SSSE3)
+ _NEON2SSE_INLINE __m128i _MM_PACKUS1_EPI32(__m128i a)
+ {
+ _NEON2SSE_ALIGN_16 int8_t mask8_32_even_odd[16] = { 0,1, 4,5, 8,9, 12,13, 2,3, 6,7,10,11,14,15};
+ __m128i a16, res, reshi,cmp, zero;
+ zero = _mm_setzero_si128();
+ a16 = _mm_shuffle_epi8 (a, *(__m128i*)mask8_32_even_odd);
+ reshi = _mm_unpackhi_epi64(a16, a16); //hi part of result used for saturation
+ cmp = _mm_cmpgt_epi16(zero, reshi); //if cmp<0 the result should be zero
+ res = _mm_andnot_si128(cmp, a16); //if cmp zero - do nothing, otherwise cmp <0 and the result is 0
+ cmp = _mm_cmpgt_epi16(reshi,zero); //if cmp positive
+ return _mm_or_si128(res, cmp); //if cmp positive we are out of 16bits need to saturaate to 0xffff
+ }
+ #endif
+
+ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(__m128i _MM_MULLO_EPI32(__m128i a, __m128i b), _NEON2SSE_REASON_SLOW_SERIAL)
+ {
+ _NEON2SSE_ALIGN_16 int32_t atmp[4], btmp[4], res[4];
+ int64_t res64;
+ int i;
+ _mm_store_si128((__m128i*)atmp, a);
+ _mm_store_si128((__m128i*)btmp, b);
+ for (i = 0; i<4; i++) {
+ res64 = atmp[i] * btmp[i];
+ res[i] = (int)(res64 & 0xffffffff);
+ }
+ return _mm_load_si128((__m128i*)res);
+ }
+
+ #if defined(USE_SSSE3)
+ _NEON2SSE_INLINE __m128i _MM_MUL_EPI32(__m128i a, __m128i b)
+ {
+ __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg;
+ sign = _mm_xor_si128 (a, b);
+ sign = _mm_srai_epi32 (sign, 31); //promote sign bit to all fields, all fff if negative and all 0 if positive
+ zero = _mm_setzero_si128();
+ a_neg = _mm_abs_epi32 (a); //negate a and b
+ b_neg = _mm_abs_epi32 (b); //negate a and b
+ mul_us = _mm_mul_epu32 (a_neg, b_neg); //uses 0 and 2nd data lanes, (abs), the multiplication gives 64 bit result
+ mul_us_neg = _mm_sub_epi64(zero, mul_us);
+ mul_us_neg = _mm_and_si128(sign, mul_us_neg);
+ mul_us = _mm_andnot_si128(sign, mul_us);
+ return _mm_or_si128 (mul_us, mul_us_neg);
+ }
+ #endif
+#endif //SSE4
+
+#ifndef _MM_INSERT_EPI64 //special case of SSE4 and _M_X64
+ _NEON2SSE_INLINE __m128i _MM_INSERT_EPI64(__m128i vec, int p, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 uint64_t pvec[2] = {0,0};
+ _NEON2SSE_ALIGN_16 uint64_t mask[2] = {0xffffffffffffffff,0xffffffffffffffff};
+ __m128i vec_masked, p_masked;
+ pvec[LANE] = p;
+ mask[LANE] = 0x0;
+ vec_masked = _mm_and_si128 (*(__m128i*)mask,vec); //ready for p
+ p_masked = _mm_andnot_si128 (*(__m128i*)mask,*(__m128i*)pvec); //ready for vec
+ return _mm_or_si128(vec_masked, p_masked);
+ }
+#endif
+#ifndef _MM_EXTRACT_EPI64 //special case of SSE4 and _M_X64
+ _NEON2SSE_INLINE int64_t _MM_EXTRACT_EPI64(__m128i val, const int LANE)
+ {
+ _NEON2SSE_ALIGN_16 int64_t tmp[2];
+ _mm_store_si128((__m128i*)tmp, val);
+ return tmp[LANE];
+ }
+#endif
+
+int32x4_t vqd_s32(int32x4_t a); //Doubling saturation for signed ints
+_NEON2SSE_INLINE int32x4_t vqd_s32(int32x4_t a)
+{ //Overflow happens only if a and sum have the opposite signs
+ __m128i c7fffffff, res, res_sat, res_xor_a;
+ c7fffffff = _mm_set1_epi32(0x7fffffff);
+ res = _mm_slli_epi32 (a, 1); // res = a*2
+ res_sat = _mm_srli_epi32(a, 31);
+ res_sat = _mm_add_epi32(res_sat, c7fffffff);
+ res_xor_a = _mm_xor_si128(res, a);
+ res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise
+ res_sat = _mm_and_si128(res_xor_a, res_sat);
+ res = _mm_andnot_si128(res_xor_a, res);
+ return _mm_or_si128(res, res_sat);
+}
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//*************************************************************************
+//*************************************************************************
+//***************** Functions redefinition\implementatin starts here *****
+//*************************************************************************
+//*************************************************************************
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+/*If the unified intrinsics solutions is necessary please define your SSE intrinsics wrap here like in the following sample:
+#ifdef ARM
+#define vector_addq_s32 _mm_add_epi32
+#else //if we have IA
+#endif
+
+********************************************************************************************
+Functions below are organised in the following way:
+
+Each NEON intrinsic function has one of the following options:
+1. its x86 full equivalent SSE intrinsic - in this case x86 version just follows the NEON one under the corresponding #define statement
+2. x86 implementation using more than one x86 intrinsics. In this case it is shaped as inlined C function with return statement
+3. the reference to the NEON function returning the same result and implemented in x86 as above. In this case it is shaped as matching NEON function definition
+4. for about 5% of functions due to the corresponding x86 SIMD unavailability or inefficiency in terms of performance
+the serial implementation is provided along with the corresponding compiler warnin//these functions are on your app critical path
+- please consider such functions removal from your code.
+*/
+
+//***********************************************************************
+//************************ Vector add *****************************
+//***********************************************************************
+
+int8x16_t vaddq_s8(int8x16_t a, int8x16_t b); // VADD.I8 q0,q0,q0
+#define vaddq_s8 _mm_add_epi8
+
+int16x8_t vaddq_s16(int16x8_t a, int16x8_t b); // VADD.I16 q0,q0,q0
+#define vaddq_s16 _mm_add_epi16
+
+int32x4_t vaddq_s32(int32x4_t a, int32x4_t b); // VADD.I32 q0,q0,q0
+#define vaddq_s32 _mm_add_epi32
+
+int64x2_t vaddq_s64(int64x2_t a, int64x2_t b); // VADD.I64 q0,q0,q0
+#define vaddq_s64 _mm_add_epi64
+
+float32x4_t vaddq_f32(float32x4_t a, float32x4_t b); // VADD.F32 q0,q0,q0
+#define vaddq_f32 _mm_add_ps
+
+uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b); // VADD.I8 q0,q0,q0
+#define vaddq_u8 _mm_add_epi8
+
+uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b); // VADD.I16 q0,q0,q0
+#define vaddq_u16 _mm_add_epi16
+
+uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b); // VADD.I32 q0,q0,q0
+#define vaddq_u32 _mm_add_epi32
+
+uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b); // VADD.I64 q0,q0,q0
+#define vaddq_u64 _mm_add_epi64
+
+//**************************** Vector long add *****************************:
+//***********************************************************************
+//Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width.
+
+//*************** Vector wide add: vaddw_<type>. Vr[i]:=Va[i]+Vb[i] ******************
+//*************** *********************************************************************
+
+//******************************Vector halving add: vhadd -> Vr[i]:=(Va[i]+Vb[i])>>1 , result truncated *******************************
+//*************************************************************************************************************************
+
+int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b); // VHADD.S8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b)
+{ //need to avoid internal overflow, will use the (x&y)+((x^y)>>1).
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_and_si128(a,b);
+ tmp2 = _mm_xor_si128(a,b);
+ tmp2 = vshrq_n_s8(tmp2,1);
+ return _mm_add_epi8(tmp1,tmp2);
+}
+
+int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b); // VHADD.S1 6 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b)
+{ //need to avoid internal overflow, will use the (x&y)+((x^y)>>1).
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_and_si128(a,b);
+ tmp2 = _mm_xor_si128(a,b);
+ tmp2 = _mm_srai_epi16(tmp2,1);
+ return _mm_add_epi16(tmp1,tmp2);
+}
+
+int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b); // VHADD.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b) // VHADD.S32 q0,q0,q0
+{ //need to avoid internal overflow, will use the (x&y)+((x^y)>>1).
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_and_si128(a,b);
+ tmp2 = _mm_xor_si128(a,b);
+ tmp2 = _mm_srai_epi32(tmp2,1);
+ return _mm_add_epi32(tmp1,tmp2);
+}
+
+uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b); // VHADD.U8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b) // VHADD.U8 q0,q0,q0
+{
+ __m128i c1, sum, res;
+ c1 = _mm_set1_epi8(1);
+ sum = _mm_avg_epu8(a, b); //result is rounded, need to compensate it
+ res = _mm_xor_si128(a, b); //for rounding compensation
+ res = _mm_and_si128(res,c1); //for rounding compensation
+ return _mm_sub_epi8 (sum, res); //actual rounding compensation
+}
+
+uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b); // VHADD.s16 q0,q0,q0
+_NEON2SSE_INLINE uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b) // VHADD.s16 q0,q0,q0
+{
+ __m128i sum, res;
+ sum = _mm_avg_epu16(a, b); //result is rounded, need to compensate it
+ res = _mm_xor_si128(a, b); //for rounding compensation
+ res = _mm_slli_epi16 (res,15); //shift left then back right to
+ res = _mm_srli_epi16 (res,15); //get 1 or zero
+ return _mm_sub_epi16 (sum, res); //actual rounding compensation
+}
+
+uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b); // VHADD.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b) // VHADD.U32 q0,q0,q0
+{ //need to avoid internal overflow, will use the (x&y)+((x^y)>>1).
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_and_si128(a,b);
+ tmp2 = _mm_xor_si128(a,b);
+ tmp2 = _mm_srli_epi32(tmp2,1);
+ return _mm_add_epi32(tmp1,tmp2);
+}
+
+//************************Vector rounding halving add: vrhadd{q}_<type>. Vr[i]:=(Va[i]+Vb[i]+1)>>1 ***************************
+//*****************************************************************************************************************************
+
+//SSE, result rounding!!!
+
+//SSE, result rounding!!!
+
+int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b); // VRHADD.S8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b) // VRHADD.S8 q0,q0,q0
+{ //no signed average in x86 SIMD, go to unsigned
+ __m128i c128, au, bu, sum;
+ c128 = _mm_set1_epi8(128);
+ au = _mm_add_epi8(a, c128);
+ bu = _mm_add_epi8(b, c128);
+ sum = _mm_avg_epu8(au, bu);
+ return _mm_sub_epi8 (sum, c128);
+}
+
+int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b); // VRHADD.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b) // VRHADD.S16 q0,q0,q0
+{ //no signed average in x86 SIMD, go to unsigned
+ __m128i cx8000, au, bu, sum;
+ cx8000 = _mm_set1_epi16(0x8000);
+ au = _mm_add_epi16(a, cx8000);
+ bu = _mm_add_epi16(b, cx8000);
+ sum = _mm_avg_epu16(au, bu);
+ return _mm_sub_epi16 (sum, cx8000);
+}
+
+int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b); // VRHADD.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b)
+{ //need to avoid overflow
+ __m128i a2, b2, res, sum;
+ a2 = _mm_srai_epi32(a,1); //a2=a/2;
+ b2 = _mm_srai_epi32(b,1); // b2=b/2;
+ res = _mm_or_si128(a,b); //for rounding
+ res = _mm_slli_epi32 (res,31); //shift left then back right to
+ res = _mm_srli_epi32 (res,31); //get 1 or zero
+ sum = _mm_add_epi32(a2,b2);
+ return _mm_add_epi32(sum,res);
+}
+
+uint8x16_t vrhaddq_u8(uint8x16_t a, uint8x16_t b); // VRHADD.U8 q0,q0,q0
+#define vrhaddq_u8 _mm_avg_epu8 //SSE2, results rounded
+
+uint16x8_t vrhaddq_u16(uint16x8_t a, uint16x8_t b); // VRHADD.s16 q0,q0,q0
+#define vrhaddq_u16 _mm_avg_epu16 //SSE2, results rounded
+
+uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b); // VRHADD.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b) // VRHADD.U32 q0,q0,q0
+{ //need to avoid overflow
+ __m128i a2, b2, res, sum;
+ a2 = _mm_srli_epi32(a,1); //a2=a/2;
+ b2 = _mm_srli_epi32(b,1); // b2=b/2;
+ res = _mm_or_si128(a,b); //for rounding
+ res = _mm_slli_epi32 (res,31); //shift left then back right to
+ res = _mm_srli_epi32 (res,31); //get 1 or zero
+ sum = _mm_add_epi32(a2,b2);
+ return _mm_add_epi32(sum,res);
+}
+
+//****************** VQADD: Vector saturating add ************************
+//************************************************************************
+
+int8x16_t vqaddq_s8(int8x16_t a, int8x16_t b); // VQADD.S8 q0,q0,q0
+#define vqaddq_s8 _mm_adds_epi8
+
+int16x8_t vqaddq_s16(int16x8_t a, int16x8_t b); // VQADD.S16 q0,q0,q0
+#define vqaddq_s16 _mm_adds_epi16
+
+int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b); // VQADD.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b)
+{ //no corresponding x86 SIMD soulution, special tricks are necessary. Overflow happens only if a and b have the same sign and sum has the opposite sign
+ __m128i c7fffffff, res, res_sat, res_xor_a, b_xor_a_;
+ c7fffffff = _mm_set1_epi32(0x7fffffff);
+ res = _mm_add_epi32(a, b);
+ res_sat = _mm_srli_epi32(a, 31);
+ res_sat = _mm_add_epi32(res_sat, c7fffffff);
+ res_xor_a = _mm_xor_si128(res, a);
+ b_xor_a_ = _mm_xor_si128(b, a);
+ res_xor_a = _mm_andnot_si128(b_xor_a_, res_xor_a);
+ res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise
+ res_sat = _mm_and_si128(res_xor_a, res_sat);
+ res = _mm_andnot_si128(res_xor_a, res);
+ return _mm_or_si128(res, res_sat);
+}
+
+int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b); // VQADD.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ _mm_store_si128((__m128i*)btmp, b);
+ res[0] = atmp[0] + btmp[0];
+ res[1] = atmp[1] + btmp[1];
+
+ atmp[0] = (atmp[0] >> 63) + (~_SIGNBIT64);
+ atmp[1] = (atmp[1] >> 63) + (~_SIGNBIT64);
+
+ if ((int64_t)((btmp[0] ^ atmp[0]) | ~(res[0] ^ btmp[0]))>=0) {
+ res[0] = atmp[0];
+ }
+ if ((int64_t)((btmp[1] ^ atmp[1]) | ~(res[1] ^ btmp[1]))>=0) {
+ res[1] = atmp[1];
+ }
+ return _mm_load_si128((__m128i*)res);
+}
+
+uint8x16_t vqaddq_u8(uint8x16_t a, uint8x16_t b); // VQADD.U8 q0,q0,q0
+#define vqaddq_u8 _mm_adds_epu8
+
+uint16x8_t vqaddq_u16(uint16x8_t a, uint16x8_t b); // VQADD.s16 q0,q0,q0
+#define vqaddq_u16 _mm_adds_epu16
+
+uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b); // VQADD.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b)
+{
+ __m128i c80000000, cmp, subsum, suba, sum;
+ c80000000 = _mm_set1_epi32 (0x80000000);
+ sum = _mm_add_epi32 (a, b);
+ subsum = _mm_sub_epi32 (sum, c80000000);
+ suba = _mm_sub_epi32 (a, c80000000);
+ cmp = _mm_cmpgt_epi32 ( suba, subsum); //no unsigned comparison, need to go to signed
+ return _mm_or_si128 (sum, cmp); //saturation
+}
+
+uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b); // VQADD.U64 q0,q0,q0
+#ifdef USE_SSE4
+ _NEON2SSE_INLINE uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b)
+ {
+ __m128i c80000000, sum, cmp, suba, subsum;
+ c80000000 = _mm_set_epi32 (0x80000000, 0x0, 0x80000000, 0x0);
+ sum = _mm_add_epi64 (a, b);
+ subsum = _mm_sub_epi64 (sum, c80000000);
+ suba = _mm_sub_epi64 (a, c80000000);
+ cmp = _mm_cmpgt_epi64 ( suba, subsum); //no unsigned comparison, need to go to signed, SSE4.2!!!
+ return _mm_or_si128 (sum, cmp); //saturation
+ }
+#else
+ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+ {
+ _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ _mm_store_si128((__m128i*)btmp, b);
+ res[0] = atmp[0] + btmp[0];
+ res[1] = atmp[1] + btmp[1];
+ if (res[0] < atmp[0]) res[0] = ~(uint64_t)0;
+ if (res[1] < atmp[1]) res[1] = ~(uint64_t)0;
+ return _mm_load_si128((__m128i*)(res));
+ }
+#endif
+
+//******************* Vector add high half (truncated) ******************
+//************************************************************************
+
+//*********** Vector rounding add high half: vraddhn_<type> ******************.
+//***************************************************************************
+
+//**********************************************************************************
+//********* Multiplication *************************************
+//**************************************************************************************
+
+//Vector multiply: vmul -> Vr[i] := Va[i] * Vb[i]
+//As we don't go to wider result functions are equal to "multiply low" in x86
+
+#if defined(USE_SSSE3)
+int8x16_t vmulq_s8(int8x16_t a, int8x16_t b); // VMUL.I8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vmulq_s8(int8x16_t a, int8x16_t b) // VMUL.I8 q0,q0,q0
+{ // no 8 bit simd multiply, need to go to 16 bits
+ //solution may be not optimal
+ __m128i a16, b16, r16_1, r16_2;
+ _NEON2SSE_ALIGN_16 int8_t mask8_16_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 };
+ a16 = _MM_CVTEPI8_EPI16 (a); // SSE 4.1
+ b16 = _MM_CVTEPI8_EPI16 (b); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (a16, b16);
+ //swap hi and low part of a and b to process the remaining data
+ a16 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ a16 = _MM_CVTEPI8_EPI16 (a16); // SSE 4.1
+ b16 = _MM_CVTEPI8_EPI16 (b16); // SSE 4.1 __m128i r16_2
+
+ r16_2 = _mm_mullo_epi16 (a16, b16);
+ r16_1 = _mm_shuffle_epi8 (r16_1, *(__m128i*)mask8_16_even_odd); //return to 8 bit
+ r16_2 = _mm_shuffle_epi8 (r16_2, *(__m128i*)mask8_16_even_odd); //return to 8 bit
+
+ return _mm_unpacklo_epi64(r16_1, r16_2);
+}
+#endif
+
+int16x8_t vmulq_s16(int16x8_t a, int16x8_t b); // VMUL.I16 q0,q0,q0
+#define vmulq_s16 _mm_mullo_epi16
+
+int32x4_t vmulq_s32(int32x4_t a, int32x4_t b); // VMUL.I32 q0,q0,q0
+#define vmulq_s32 _MM_MULLO_EPI32 //SSE4.1
+
+float32x4_t vmulq_f32(float32x4_t a, float32x4_t b); // VMUL.F32 q0,q0,q0
+#define vmulq_f32 _mm_mul_ps
+
+uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b); // VMUL.I8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b) // VMUL.I8 q0,q0,q0
+{ // no 8 bit simd multiply, need to go to 16 bits
+ //solution may be not optimal
+ __m128i maskff, a16, b16, r16_1, r16_2;
+ maskff = _mm_set1_epi16(0xff);
+ a16 = _MM_CVTEPU8_EPI16 (a); // SSE 4.1
+ b16 = _MM_CVTEPU8_EPI16 (b); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (a16, b16);
+ r16_1 = _mm_and_si128(r16_1, maskff); //to avoid saturation
+ //swap hi and low part of a and b to process the remaining data
+ a16 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ a16 = _MM_CVTEPI8_EPI16 (a16); // SSE 4.1
+ b16 = _MM_CVTEPI8_EPI16 (b16); // SSE 4.1
+
+ r16_2 = _mm_mullo_epi16 (a16, b16);
+ r16_2 = _mm_and_si128(r16_2, maskff); //to avoid saturation
+ return _mm_packus_epi16 (r16_1, r16_2);
+}
+
+uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b); // VMUL.I16 q0,q0,q0
+#define vmulq_u16 _mm_mullo_epi16
+
+uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b); // VMUL.I32 q0,q0,q0
+#define vmulq_u32 _MM_MULLO_EPI32 //SSE4.1
+
+poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b); // VMUL.P8 q0,q0,q0
+_NEON2SSE_INLINE poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b)
+{ //may be optimized
+ __m128i c1, res, tmp, bmasked;
+ int i;
+ c1 = _mm_cmpeq_epi8 (a,a); //all ones 0xff....
+ c1 = vshrq_n_u8(c1,7); //0x1
+ bmasked = _mm_and_si128(b, c1); //0x1
+ res = vmulq_u8(a, bmasked);
+ for(i = 1; i<8; i++) {
+ c1 = _mm_slli_epi16(c1,1); //shift mask left by 1, 16 bit shift is OK here
+ bmasked = _mm_and_si128(b, c1); //0x1
+ tmp = vmulq_u8(a, bmasked);
+ res = _mm_xor_si128(res, tmp);
+ }
+ return res;
+}
+
+//************************* Vector long multiply ***********************************
+//****************************************************************************
+
+//****************Vector saturating doubling long multiply **************************
+//*****************************************************************
+
+//********************* Vector multiply accumulate: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i] ************************
+//******************************************************************************************
+
+#if defined(USE_SSSE3)
+int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLA.I8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VMLA.I8 q0,q0,q0
+{ //solution may be not optimal
+ // no 8 bit simd multiply, need to go to 16 bits
+ __m128i b16, c16, r16_1, a_2,r16_2;
+ _NEON2SSE_ALIGN_16 int8_t mask8_16_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 };
+ b16 = _MM_CVTEPI8_EPI16 (b); // SSE 4.1
+ c16 = _MM_CVTEPI8_EPI16 (c); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (b16, c16);
+ r16_1 = _mm_shuffle_epi8 (r16_1, *(__m128i*) mask8_16_even_odd); //return to 8 bits
+ r16_1 = _mm_add_epi8 (r16_1, a);
+ //swap hi and low part of a, b and c to process the remaining data
+ a_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ c16 = _mm_shuffle_epi32 (c, _SWAP_HI_LOW32);
+ b16 = _MM_CVTEPI8_EPI16 (b16); // SSE 4.1
+ c16 = _MM_CVTEPI8_EPI16 (c16); // SSE 4.1
+
+ r16_2 = _mm_mullo_epi16 (b16, c16);
+ r16_2 = _mm_shuffle_epi8 (r16_2, *(__m128i*) mask8_16_even_odd);
+ r16_2 = _mm_add_epi8(r16_2, a_2);
+ return _mm_unpacklo_epi64(r16_1,r16_2);
+}
+#endif
+
+int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLA.I16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VMLA.I16 q0,q0,q0
+{
+ __m128i res;
+ res = _mm_mullo_epi16 (c, b);
+ return _mm_add_epi16 (res, a);
+}
+
+int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLA.I32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VMLA.I32 q0,q0,q0
+{
+ __m128i res;
+ res = _MM_MULLO_EPI32 (c, b); //SSE4.1
+ return _mm_add_epi32 (res, a);
+}
+
+float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLA.F32 q0,q0,q0
+_NEON2SSE_INLINE float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) // VMLA.F32 q0,q0,q0
+{ //fma is coming soon, but right now:
+ __m128 res;
+ res = _mm_mul_ps (c, b);
+ return _mm_add_ps (a, res);
+}
+
+#if defined(USE_SSSE3)
+uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLA.I8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) // VMLA.I8 q0,q0,q0
+{ //solution may be not optimal
+ // no 8 bit simd multiply, need to go to 16 bits
+ __m128i b16, c16, r16_1, a_2, r16_2;
+ _NEON2SSE_ALIGN_16 int8_t mask8_16_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 };
+ b16 = _MM_CVTEPU8_EPI16 (b); // SSE 4.1
+ c16 = _MM_CVTEPU8_EPI16 (c); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (b16, c16);
+ r16_1 = _mm_shuffle_epi8 (r16_1, *(__m128i*) mask8_16_even_odd); //return to 8 bits
+ r16_1 = _mm_add_epi8 (r16_1, a);
+ //swap hi and low part of a, b and c to process the remaining data
+ a_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ c16 = _mm_shuffle_epi32 (c, _SWAP_HI_LOW32);
+ b16 = _MM_CVTEPU8_EPI16 (b16); // SSE 4.1
+ c16 = _MM_CVTEPU8_EPI16 (c16); // SSE 4.1
+
+ r16_2 = _mm_mullo_epi16 (b16, c16);
+ r16_2 = _mm_shuffle_epi8 (r16_2, *(__m128i*) mask8_16_even_odd);
+ r16_2 = _mm_add_epi8(r16_2, a_2);
+ return _mm_unpacklo_epi64(r16_1,r16_2);
+}
+#endif
+
+uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLA.I16 q0,q0,q0
+#define vmlaq_u16 vmlaq_s16
+
+uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLA.I32 q0,q0,q0
+#define vmlaq_u32 vmlaq_s32
+
+//********************** Vector widening multiply accumulate (long multiply accumulate):
+// vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i] **************
+//********************************************************************************************
+
+//******************** Vector multiply subtract: vmls -> Vr[i] := Va[i] - Vb[i] * Vc[i] ***************************************
+//********************************************************************************************
+
+#if defined(USE_SSSE3)
+int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VMLS.I8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VMLS.I8 q0,q0,q0
+{ //solution may be not optimal
+ // no 8 bit simd multiply, need to go to 16 bits
+ __m128i b16, c16, r16_1, a_2, r16_2;
+ _NEON2SSE_ALIGN_16 int8_t mask8_16_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 };
+ b16 = _MM_CVTEPI8_EPI16 (b); // SSE 4.1
+ c16 = _MM_CVTEPI8_EPI16 (c); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (b16, c16);
+ r16_1 = _mm_shuffle_epi8 (r16_1, *(__m128i*) mask8_16_even_odd);
+ r16_1 = _mm_sub_epi8 (a, r16_1);
+ //swap hi and low part of a, b, c to process the remaining data
+ a_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ c16 = _mm_shuffle_epi32 (c, _SWAP_HI_LOW32);
+ b16 = _MM_CVTEPI8_EPI16 (b16); // SSE 4.1
+ c16 = _MM_CVTEPI8_EPI16 (c16); // SSE 4.1
+
+ r16_2 = _mm_mullo_epi16 (b16, c16);
+ r16_2 = _mm_shuffle_epi8 (r16_2, *(__m128i*) mask8_16_even_odd);
+ r16_2 = _mm_sub_epi8 (a_2, r16_2);
+ return _mm_unpacklo_epi64(r16_1,r16_2);
+}
+#endif
+
+int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VMLS.I16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VMLS.I16 q0,q0,q0
+{
+ __m128i res;
+ res = _mm_mullo_epi16 (c, b);
+ return _mm_sub_epi16 (a, res);
+}
+
+int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VMLS.I32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VMLS.I32 q0,q0,q0
+{
+ __m128i res;
+ res = _MM_MULLO_EPI32 (c, b); //SSE4.1
+ return _mm_sub_epi32 (a, res);
+}
+
+float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLS.F32 q0,q0,q0
+_NEON2SSE_INLINE float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) // VMLS.F32 q0,q0,q0
+{
+ __m128 res;
+ res = _mm_mul_ps (c, b);
+ return _mm_sub_ps (a, res);
+}
+
+#if defined(USE_SSSE3)
+uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VMLS.I8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) // VMLS.I8 q0,q0,q0
+{ //solution may be not optimal
+ // no 8 bit simd multiply, need to go to 16 bits
+ __m128i b16, c16, r16_1, a_2, r16_2;
+ _NEON2SSE_ALIGN_16 int8_t mask8_16_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 };
+ b16 = _MM_CVTEPU8_EPI16 (b); // SSE 4.1
+ c16 = _MM_CVTEPU8_EPI16 (c); // SSE 4.1
+ r16_1 = _mm_mullo_epi16 (b16, c16);
+ r16_1 = _mm_shuffle_epi8 (r16_1, *(__m128i*) mask8_16_even_odd); //return to 8 bits
+ r16_1 = _mm_sub_epi8 (a, r16_1);
+ //swap hi and low part of a, b and c to process the remaining data
+ a_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ b16 = _mm_shuffle_epi32 (b, _SWAP_HI_LOW32);
+ c16 = _mm_shuffle_epi32 (c, _SWAP_HI_LOW32);
+ b16 = _MM_CVTEPU8_EPI16 (b16); // SSE 4.1
+ c16 = _MM_CVTEPU8_EPI16 (c16); // SSE 4.1
+
+ r16_2 = _mm_mullo_epi16 (b16, c16);
+ r16_2 = _mm_shuffle_epi8 (r16_2, *(__m128i*) mask8_16_even_odd);
+ r16_2 = _mm_sub_epi8(a_2, r16_2);
+ return _mm_unpacklo_epi64(r16_1,r16_2);
+}
+#endif
+
+uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VMLS.I16 q0,q0,q0
+#define vmlsq_u16 vmlsq_s16
+
+uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VMLS.I32 q0,q0,q0
+#define vmlsq_u32 vmlsq_s32
+
+//******************** Vector multiply subtract long (widening multiply subtract) ************************************
+//*************************************************************************************************************
+
+//****** Vector saturating doubling multiply high **********************
+//*************************************************************************
+//For some ARM implementations if the multiply high result is all 0xffffffff then it is not doubled. We do the same here
+
+int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b); // VQDMULH.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b) // VQDMULH.S16 q0,q0,q0
+{
+ __m128i res_sat, cffff, mask, res;
+ res = _mm_mulhi_epi16 (a, b);
+ cffff = _mm_cmpeq_epi16(res,res); //0xffff
+ mask = _mm_cmpeq_epi16(res, cffff); //if ffff need to saturate
+ res_sat = _mm_adds_epi16(res, res); //res *= 2 and saturate
+ return _mm_or_si128(mask, res_sat);
+}
+
+#if defined(USE_SSSE3)
+int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b); // VQDMULH.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE)
+{ // no multiply high 32 bit SIMD in IA32, may be not optimal compared with a serial solution for the SSSE3 target
+ __m128i ab, ba, res_sat, cffffffff, mask, mul, mul1;
+ ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1
+ ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1
+ mul = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result
+ ab = _mm_unpackhi_epi32 (a, b); //a2, b2, a3,b3
+ ba = _mm_unpackhi_epi32 (b, a); //b2, a2, b3,a3
+ mul1 = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result
+ mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits
+ mul1 = _mm_shuffle_epi32 (mul1, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits
+ mul = _mm_unpacklo_epi64(mul, mul1);
+ cffffffff = _mm_cmpeq_epi32(mul,mul); //0xffffffff
+ mask = _mm_cmpeq_epi32(mul, cffffffff); //if ffffffff need to saturate
+ res_sat = vqd_s32(mul);
+ return _mm_or_si128(mask, res_sat);
+}
+#endif
+
+//********* Vector saturating rounding doubling multiply high ****************
+//****************************************************************************
+//If use _mm_mulhrs_xx functions the result may differ from NEON one a little due to different rounding rules and order
+
+#if defined(USE_SSSE3)
+int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b); // VQRDMULH.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b) // VQRDMULH.S16 q0,q0,q0
+{
+ __m128i res_sat, cffff, mask, res;
+ res = _mm_mulhrs_epi16 (a, b);
+ cffff = _mm_cmpeq_epi16(res,res); //0xffff
+ mask = _mm_cmpeq_epi16(res, cffff); //if ffff need to saturate
+ res_sat = _mm_adds_epi16(res, res); //res *= 2 and saturate
+ return _mm_or_si128(mask, res_sat);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b); // VQRDMULH.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_UNEFFECTIVE)
+{ // no multiply high 32 bit SIMD in IA32, may be not optimal compared with a serial solution for the SSSE3 target
+ __m128i ab, ba, res_sat, cffffffff, mask, mul, mul1, mask1;
+ ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1
+ ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1
+ mul = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result
+ ab = _mm_unpackhi_epi32 (a, b); //a2, b2, a3,b3
+ ba = _mm_unpackhi_epi32 (b, a); //b2, a2, b3,a3
+ mul1 = _MM_MUL_EPI32(ab, ba); //uses 1rst and 3rd data lanes, the multiplication gives 64 bit result
+ mul = _mm_shuffle_epi32 (mul, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits
+ mul1 = _mm_shuffle_epi32 (mul1, 1 | (3 << 2) | (0 << 4) | (2 << 6)); //shuffle the data to get 2 32-bits
+ mul = _mm_unpacklo_epi64(mul, mul1);
+ cffffffff = _mm_cmpeq_epi32(mul,mul); //0xffffffff
+ mask1 = _mm_slli_epi32(mul, 17); //shift left then back right to
+ mask1 = _mm_srli_epi32(mul,31); //get 15-th bit 1 or zero
+ mul = _mm_add_epi32 (mul, mask1); //actual rounding
+ mask = _mm_cmpeq_epi32(mul, cffffffff); //if ffffffff need to saturate
+ res_sat = vqd_s32(mul);
+ return _mm_or_si128(mask, res_sat);
+}
+#endif
+
+//*************Vector widening saturating doubling multiply accumulate (long saturating doubling multiply accumulate) *****
+//*************************************************************************************************************************
+
+//************************************************************************************
+//****************** Vector subtract ***********************************************
+//************************************************************************************
+
+int8x16_t vsubq_s8(int8x16_t a, int8x16_t b); // VSUB.I8 q0,q0,q0
+#define vsubq_s8 _mm_sub_epi8
+
+int16x8_t vsubq_s16(int16x8_t a, int16x8_t b); // VSUB.I16 q0,q0,q0
+#define vsubq_s16 _mm_sub_epi16
+
+int32x4_t vsubq_s32(int32x4_t a, int32x4_t b); // VSUB.I32 q0,q0,q0
+#define vsubq_s32 _mm_sub_epi32
+
+int64x2_t vsubq_s64(int64x2_t a, int64x2_t b); // VSUB.I64 q0,q0,q0
+#define vsubq_s64 _mm_sub_epi64
+
+float32x4_t vsubq_f32(float32x4_t a, float32x4_t b); // VSUB.F32 q0,q0,q0
+#define vsubq_f32 _mm_sub_ps
+
+uint8x16_t vsubq_u8(uint8x16_t a, uint8x16_t b); // VSUB.I8 q0,q0,q0
+#define vsubq_u8 _mm_sub_epi8
+
+uint16x8_t vsubq_u16(uint16x8_t a, uint16x8_t b); // VSUB.I16 q0,q0,q0
+#define vsubq_u16 _mm_sub_epi16
+
+uint32x4_t vsubq_u32(uint32x4_t a, uint32x4_t b); // VSUB.I32 q0,q0,q0
+#define vsubq_u32 _mm_sub_epi32
+
+uint64x2_t vsubq_u64(uint64x2_t a, uint64x2_t b); // VSUB.I64 q0,q0,q0
+#define vsubq_u64 _mm_sub_epi64
+
+//***************Vector long subtract: vsub -> Vr[i]:=Va[i]-Vb[i] ******************
+//***********************************************************************************
+//Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width.
+
+//***************** Vector wide subtract: vsub -> Vr[i]:=Va[i]-Vb[i] **********************************
+//*****************************************************************************************************
+
+//************************Vector saturating subtract *********************************
+//*************************************************************************************
+
+int8x16_t vqsubq_s8(int8x16_t a, int8x16_t b); // VQSUB.S8 q0,q0,q0
+#define vqsubq_s8 _mm_subs_epi8
+
+int16x8_t vqsubq_s16(int16x8_t a, int16x8_t b); // VQSUB.S16 q0,q0,q0
+#define vqsubq_s16 _mm_subs_epi16
+
+int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b); // VQSUB.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b)
+{ //no corresponding x86 SIMD soulution, special tricks are necessary. The overflow is possible only if a and b have opposite signs and sub has opposite sign to a
+ __m128i c7fffffff, res, res_sat, res_xor_a, b_xor_a;
+ c7fffffff = _mm_set1_epi32(0x7fffffff);
+ res = _mm_sub_epi32(a, b);
+ res_sat = _mm_srli_epi32(a, 31);
+ res_sat = _mm_add_epi32(res_sat, c7fffffff);
+ res_xor_a = _mm_xor_si128(res, a);
+ b_xor_a = _mm_xor_si128(b, a);
+ res_xor_a = _mm_and_si128(b_xor_a, res_xor_a);
+ res_xor_a = _mm_srai_epi32(res_xor_a,31); //propagate the sigh bit, all ffff if <0 all ones otherwise
+ res_sat = _mm_and_si128(res_xor_a, res_sat);
+ res = _mm_andnot_si128(res_xor_a, res);
+ return _mm_or_si128(res, res_sat);
+}
+
+int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b); // VQSUB.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL) //no optimal SIMD soulution
+{
+ _NEON2SSE_ALIGN_16 int64_t atmp[2], btmp[2];
+ _NEON2SSE_ALIGN_16 uint64_t res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ _mm_store_si128((__m128i*)btmp, b);
+ res[0] = atmp[0] - btmp[0];
+ res[1] = atmp[1] - btmp[1];
+ if (((res[0] ^ atmp[0]) & _SIGNBIT64) && ((atmp[0] ^ btmp[0]) & _SIGNBIT64)) {
+ res[0] = (atmp[0] >> 63) ^ ~_SIGNBIT64;
+ }
+ if (((res[1] ^ atmp[1]) & _SIGNBIT64) && ((atmp[1] ^ btmp[1]) & _SIGNBIT64)) {
+ res[1] = (atmp[1] >> 63) ^ ~_SIGNBIT64;
+ }
+ return _mm_load_si128((__m128i*)res);
+}
+
+uint8x16_t vqsubq_u8(uint8x16_t a, uint8x16_t b); // VQSUB.U8 q0,q0,q0
+#define vqsubq_u8 _mm_subs_epu8
+
+uint16x8_t vqsubq_u16(uint16x8_t a, uint16x8_t b); // VQSUB.s16 q0,q0,q0
+#define vqsubq_u16 _mm_subs_epu16
+
+uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b); // VQSUB.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b) // VQSUB.U32 q0,q0,q0
+{
+ __m128i min, mask, sub;
+ min = _MM_MIN_EPU32(a, b); //SSE4.1
+ mask = _mm_cmpeq_epi32 (min, b);
+ sub = _mm_sub_epi32 (a, b);
+ return _mm_and_si128 ( sub, mask);
+}
+
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL); // VQSUB.U64 q0,q0,q0
+#ifdef USE_SSE4
+ _NEON2SSE_INLINE uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b)
+ {
+ __m128i c80000000, subb, suba, cmp, sub;
+ c80000000 = _mm_set_epi32 (0x80000000, 0x0, 0x80000000, 0x0);
+ sub = _mm_sub_epi64 (a, b);
+ suba = _mm_sub_epi64 (a, c80000000);
+ subb = _mm_sub_epi64 (b, c80000000);
+ cmp = _mm_cmpgt_epi64 ( suba, subb); //no unsigned comparison, need to go to signed, SSE4.2!!!
+ return _mm_and_si128 (sub, cmp); //saturation
+ }
+#else
+ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+ {
+ _NEON2SSE_ALIGN_16 uint64_t atmp[2], btmp[2], res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ _mm_store_si128((__m128i*)btmp, b);
+ res[0] = (atmp[0] > btmp[0]) ? atmp[0] - btmp[0] : 0;
+ res[1] = (atmp[1] > btmp[1]) ? atmp[1] - btmp[1] : 0;
+ return _mm_load_si128((__m128i*)(res));
+ }
+#endif
+
+//**********Vector halving subtract Vr[i]:=(Va[i]-Vb[i])>>1 ******************************************************
+//****************************************************************
+
+int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b); // VHSUB.S8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b) // VHSUB.S8 q0,q0,q0
+{ // //need to deal with the possibility of internal overflow
+ __m128i c128, au,bu;
+ c128 = _mm_set1_epi8 (128);
+ au = _mm_add_epi8( a, c128);
+ bu = _mm_add_epi8( b, c128);
+ return vhsubq_u8(au,bu);
+}
+
+int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b); // VHSUB.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b) // VHSUB.S16 q0,q0,q0
+{ //need to deal with the possibility of internal overflow
+ __m128i c8000, au,bu;
+ c8000 = _mm_set1_epi16(0x8000);
+ au = _mm_add_epi16( a, c8000);
+ bu = _mm_add_epi16( b, c8000);
+ return vhsubq_u16(au,bu);
+}
+
+int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b); // VHSUB.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b) // VHSUB.S32 q0,q0,q0
+{//need to deal with the possibility of internal overflow
+ __m128i a2, b2,r, b_1;
+ a2 = _mm_srai_epi32 (a,1);
+ b2 = _mm_srai_epi32 (b,1);
+ r = _mm_sub_epi32 (a2, b2);
+ b_1 = _mm_andnot_si128(a, b); //!a and b
+ b_1 = _mm_slli_epi32 (b_1,31);
+ b_1 = _mm_srli_epi32 (b_1,31); //0 or 1, last b bit
+ return _mm_sub_epi32(r,b_1);
+}
+
+uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b); // VHSUB.U8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b) // VHSUB.U8 q0,q0,q0
+{
+ __m128i avg;
+ avg = _mm_avg_epu8 (a, b);
+ return _mm_sub_epi8(a, avg);
+}
+
+uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b); // VHSUB.s16 q0,q0,q0
+_NEON2SSE_INLINE uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b) // VHSUB.s16 q0,q0,q0
+{
+ __m128i avg;
+ avg = _mm_avg_epu16 (a, b);
+ return _mm_sub_epi16(a, avg);
+}
+
+uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b); // VHSUB.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b) // VHSUB.U32 q0,q0,q0
+{//need to deal with the possibility of internal overflow
+ __m128i a2, b2,r, b_1;
+ a2 = _mm_srli_epi32 (a,1);
+ b2 = _mm_srli_epi32 (b,1);
+ r = _mm_sub_epi32 (a2, b2);
+ b_1 = _mm_andnot_si128(a, b); //!a and b
+ b_1 = _mm_slli_epi32 (b_1,31);
+ b_1 = _mm_srli_epi32 (b_1,31); //0 or 1, last b bit
+ return _mm_sub_epi32(r,b_1);
+}
+
+//******* Vector subtract high half (truncated) ** ************
+//************************************************************
+
+//************ Vector rounding subtract high half *********************
+//*********************************************************************
+
+//*********** Vector saturating doubling multiply subtract long ********************
+//************************************************************************************
+
+//****************** COMPARISON ***************************************
+//******************* Vector compare equal *************************************
+//****************************************************************************
+
+uint8x16_t vceqq_s8(int8x16_t a, int8x16_t b); // VCEQ.I8 q0, q0, q0
+#define vceqq_s8 _mm_cmpeq_epi8
+
+uint16x8_t vceqq_s16(int16x8_t a, int16x8_t b); // VCEQ.I16 q0, q0, q0
+#define vceqq_s16 _mm_cmpeq_epi16
+
+uint32x4_t vceqq_s32(int32x4_t a, int32x4_t b); // VCEQ.I32 q0, q0, q0
+#define vceqq_s32 _mm_cmpeq_epi32
+
+uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b); // VCEQ.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b)
+{
+ __m128 res;
+ res = _mm_cmpeq_ps(a,b);
+ return *(__m128i*)&res;
+}
+
+uint8x16_t vceqq_u8(uint8x16_t a, uint8x16_t b); // VCEQ.I8 q0, q0, q0
+#define vceqq_u8 _mm_cmpeq_epi8
+
+uint16x8_t vceqq_u16(uint16x8_t a, uint16x8_t b); // VCEQ.I16 q0, q0, q0
+#define vceqq_u16 _mm_cmpeq_epi16
+
+uint32x4_t vceqq_u32(uint32x4_t a, uint32x4_t b); // VCEQ.I32 q0, q0, q0
+#define vceqq_u32 _mm_cmpeq_epi32
+
+uint8x16_t vceqq_p8(poly8x16_t a, poly8x16_t b); // VCEQ.I8 q0, q0, q0
+#define vceqq_p8 _mm_cmpeq_epi8
+
+//******************Vector compare greater-than or equal*************************
+//*******************************************************************************
+//in IA SIMD no greater-than-or-equal comparison for integers,
+// there is greater-than available only, so we need the following tricks
+
+uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0
+_NEON2SSE_INLINE uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b) // VCGE.S8 q0, q0, q0
+{
+ __m128i m1, m2;
+ m1 = _mm_cmpgt_epi8 ( a, b);
+ m2 = _mm_cmpeq_epi8 ( a, b);
+ return _mm_or_si128 ( m1, m2);
+}
+
+uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0
+_NEON2SSE_INLINE uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b) // VCGE.S16 q0, q0, q0
+{
+ __m128i m1, m2;
+ m1 = _mm_cmpgt_epi16 ( a, b);
+ m2 = _mm_cmpeq_epi16 ( a, b);
+ return _mm_or_si128 ( m1,m2);
+}
+
+uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b) // VCGE.S32 q0, q0, q0
+{
+ __m128i m1, m2;
+ m1 = _mm_cmpgt_epi32 (a, b);
+ m2 = _mm_cmpeq_epi32 (a, b);
+ return _mm_or_si128 (m1, m2);
+}
+
+uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b)
+{
+ __m128 res;
+ res = _mm_cmpge_ps(a,b); //use only 2 first entries
+ return *(__m128i*)&res;
+}
+
+uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0
+_NEON2SSE_INLINE uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b) // VCGE.U8 q0, q0, q0
+{ //no unsigned chars comparison, only signed available,so need the trick
+ #ifdef USE_SSE4
+ __m128i cmp;
+ cmp = _mm_max_epu8(a, b);
+ return _mm_cmpeq_epi8(cmp, a); //a>=b
+ #else
+ __m128i c128, as, bs, m1, m2;
+ c128 = _mm_set1_epi8 (128);
+ as = _mm_sub_epi8( a, c128);
+ bs = _mm_sub_epi8( b, c128);
+ m1 = _mm_cmpgt_epi8( as, bs);
+ m2 = _mm_cmpeq_epi8 (as, bs);
+ return _mm_or_si128 ( m1, m2);
+ #endif
+}
+
+uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b); // VCGE.s16 q0, q0, q0
+_NEON2SSE_INLINE uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b) // VCGE.s16 q0, q0, q0
+{ //no unsigned shorts comparison, only signed available,so need the trick
+ #ifdef USE_SSE4
+ __m128i cmp;
+ cmp = _mm_max_epu16(a, b);
+ return _mm_cmpeq_epi16(cmp, a); //a>=b
+ #else
+ __m128i c8000, as, bs, m1, m2;
+ c8000 = _mm_set1_epi16 (0x8000);
+ as = _mm_sub_epi16(a,c8000);
+ bs = _mm_sub_epi16(b,c8000);
+ m1 = _mm_cmpgt_epi16(as, bs);
+ m2 = _mm_cmpeq_epi16 (as, bs);
+ return _mm_or_si128 ( m1, m2);
+ #endif
+}
+
+uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b) // VCGE.U32 q0, q0, q0
+{ //no unsigned ints comparison, only signed available,so need the trick
+ #ifdef USE_SSE4
+ __m128i cmp;
+ cmp = _mm_max_epu32(a, b);
+ return _mm_cmpeq_epi32(cmp, a); //a>=b
+ #else
+ //serial solution may be faster
+ __m128i c80000000, as, bs, m1, m2;
+ c80000000 = _mm_set1_epi32 (0x80000000);
+ as = _mm_sub_epi32(a,c80000000);
+ bs = _mm_sub_epi32(b,c80000000);
+ m1 = _mm_cmpgt_epi32 (as, bs);
+ m2 = _mm_cmpeq_epi32 (as, bs);
+ return _mm_or_si128 ( m1, m2);
+ #endif
+}
+
+//**********************Vector compare less-than or equal******************************
+//***************************************************************************************
+//in IA SIMD no less-than-or-equal comparison for integers present, so we need the tricks
+
+uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b); // VCGE.S8 q0, q0, q0
+_NEON2SSE_INLINE uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b) // VCGE.S8 q0, q0, q0
+{
+ __m128i c1, res;
+ c1 = _mm_cmpeq_epi8 (a,a); //all ones 0xff....
+ res = _mm_cmpgt_epi8 ( a, b);
+ return _mm_andnot_si128 (res, c1); //inverse the cmpgt result, get less-than-or-equal
+}
+
+uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b); // VCGE.S16 q0, q0, q0
+_NEON2SSE_INLINE uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b) // VCGE.S16 q0, q0, q0
+{
+ __m128i c1, res;
+ c1 = _mm_cmpeq_epi16 (a,a); //all ones 0xff....
+ res = _mm_cmpgt_epi16 ( a, b);
+ return _mm_andnot_si128 (res, c1);
+}
+
+uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b); // VCGE.S32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b) // VCGE.S32 q0, q0, q0
+{
+ __m128i c1, res;
+ c1 = _mm_cmpeq_epi32 (a,a); //all ones 0xff....
+ res = _mm_cmpgt_epi32 ( a, b);
+ return _mm_andnot_si128 (res, c1);
+}
+
+uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b); // VCGE.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b)
+{
+ __m128 res;
+ res = _mm_cmple_ps(a,b);
+ return *(__m128i*)&res;
+}
+
+uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b); // VCGE.U8 q0, q0, q0
+#ifdef USE_SSE4
+ _NEON2SSE_INLINE uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b) // VCGE.U8 q0, q0, q0
+ { //no unsigned chars comparison in SSE, only signed available,so need the trick
+
+ __m128i cmp;
+ cmp = _mm_min_epu8(a, b);
+ return _mm_cmpeq_epi8(cmp, a); //a<=b
+ }
+#else
+ #define vcleq_u8(a,b) vcgeq_u8(b,a)
+#endif
+
+uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b); // VCGE.s16 q0, q0, q0
+#ifdef USE_SSE4
+ _NEON2SSE_INLINE uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b) // VCGE.s16 q0, q0, q0
+ { //no unsigned shorts comparison in SSE, only signed available,so need the trick
+ __m128i cmp;
+ cmp = _mm_min_epu16(a, b);
+ return _mm_cmpeq_epi16(cmp, a); //a<=b
+ }
+#else
+ #define vcleq_u16(a,b) vcgeq_u16(b,a)
+#endif
+
+uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b); // VCGE.U32 q0, q0, q0
+#ifdef USE_SSE4
+ _NEON2SSE_INLINE uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b) // VCGE.U32 q0, q0, q0
+ { //no unsigned chars comparison in SSE, only signed available,so need the trick
+ __m128i cmp;
+ cmp = _mm_min_epu32(a, b);
+ return _mm_cmpeq_epi32(cmp, a); //a<=b
+ }
+#else
+//solution may be not optimal compared with the serial one
+ #define vcleq_u32(a,b) vcgeq_u32(b,a)
+#endif
+
+//****** Vector compare greater-than ******************************************
+//**************************************************************************
+
+uint8x16_t vcgtq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0
+#define vcgtq_s8 _mm_cmpgt_epi8
+
+uint16x8_t vcgtq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0
+#define vcgtq_s16 _mm_cmpgt_epi16
+
+uint32x4_t vcgtq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0
+#define vcgtq_s32 _mm_cmpgt_epi32
+
+uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b)
+{
+ __m128 res;
+ res = _mm_cmpgt_ps(a,b); //use only 2 first entries
+ return *(__m128i*)&res;
+}
+
+uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0
+_NEON2SSE_INLINE uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b) // VCGT.U8 q0, q0, q0
+{ //no unsigned chars comparison, only signed available,so need the trick
+ __m128i c128, as, bs;
+ c128 = _mm_set1_epi8 (128);
+ as = _mm_sub_epi8(a,c128);
+ bs = _mm_sub_epi8(b,c128);
+ return _mm_cmpgt_epi8 (as, bs);
+}
+
+uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b); // VCGT.s16 q0, q0, q0
+_NEON2SSE_INLINE uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b) // VCGT.s16 q0, q0, q0
+{ //no unsigned short comparison, only signed available,so need the trick
+ __m128i c8000, as, bs;
+ c8000 = _mm_set1_epi16 (0x8000);
+ as = _mm_sub_epi16(a,c8000);
+ bs = _mm_sub_epi16(b,c8000);
+ return _mm_cmpgt_epi16 ( as, bs);
+}
+
+uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b) // VCGT.U32 q0, q0, q0
+{ //no unsigned int comparison, only signed available,so need the trick
+ __m128i c80000000, as, bs;
+ c80000000 = _mm_set1_epi32 (0x80000000);
+ as = _mm_sub_epi32(a,c80000000);
+ bs = _mm_sub_epi32(b,c80000000);
+ return _mm_cmpgt_epi32 ( as, bs);
+}
+
+//********************* Vector compare less-than **************************
+//*************************************************************************
+
+uint8x16_t vcltq_s8(int8x16_t a, int8x16_t b); // VCGT.S8 q0, q0, q0
+#define vcltq_s8(a,b) vcgtq_s8(b, a) //swap the arguments!!
+
+uint16x8_t vcltq_s16(int16x8_t a, int16x8_t b); // VCGT.S16 q0, q0, q0
+#define vcltq_s16(a,b) vcgtq_s16(b, a) //swap the arguments!!
+
+uint32x4_t vcltq_s32(int32x4_t a, int32x4_t b); // VCGT.S32 q0, q0, q0
+#define vcltq_s32(a,b) vcgtq_s32(b, a) //swap the arguments!!
+
+uint32x4_t vcltq_f32(float32x4_t a, float32x4_t b); // VCGT.F32 q0, q0, q0
+#define vcltq_f32(a,b) vcgtq_f32(b, a) //swap the arguments!!
+
+uint8x16_t vcltq_u8(uint8x16_t a, uint8x16_t b); // VCGT.U8 q0, q0, q0
+#define vcltq_u8(a,b) vcgtq_u8(b, a) //swap the arguments!!
+
+uint16x8_t vcltq_u16(uint16x8_t a, uint16x8_t b); // VCGT.s16 q0, q0, q0
+#define vcltq_u16(a,b) vcgtq_u16(b, a) //swap the arguments!!
+
+uint32x4_t vcltq_u32(uint32x4_t a, uint32x4_t b); // VCGT.U32 q0, q0, q0
+#define vcltq_u32(a,b) vcgtq_u32(b, a) //swap the arguments!!
+
+//*****************Vector compare absolute greater-than or equal ************
+//***************************************************************************
+
+uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b) // VACGE.F32 q0, q0, q0
+{
+ __m128i c7fffffff;
+ __m128 a0, b0;
+ c7fffffff = _mm_set1_epi32 (0x7fffffff);
+ a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
+ b0 = _mm_and_ps (b, *(__m128*)&c7fffffff);
+ a0 = _mm_cmpge_ps ( a0, b0);
+ return (*(__m128i*)&a0);
+}
+
+//********Vector compare absolute less-than or equal ******************
+//********************************************************************
+
+uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b); // VACGE.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b) // VACGE.F32 q0, q0, q0
+{
+ __m128i c7fffffff;
+ __m128 a0, b0;
+ c7fffffff = _mm_set1_epi32 (0x7fffffff);
+ a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
+ b0 = _mm_and_ps (b, *(__m128*)&c7fffffff);
+ a0 = _mm_cmple_ps (a0, b0);
+ return (*(__m128i*)&a0);
+}
+
+//******** Vector compare absolute greater-than ******************
+//******************************************************************
+
+uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b) // VACGT.F32 q0, q0, q0
+{
+ __m128i c7fffffff;
+ __m128 a0, b0;
+ c7fffffff = _mm_set1_epi32 (0x7fffffff);
+ a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
+ b0 = _mm_and_ps (b, *(__m128*)&c7fffffff);
+ a0 = _mm_cmpgt_ps (a0, b0);
+ return (*(__m128i*)&a0);
+}
+
+//***************Vector compare absolute less-than ***********************
+//*************************************************************************
+
+uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b); // VACGT.F32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b) // VACGT.F32 q0, q0, q0
+{
+ __m128i c7fffffff;
+ __m128 a0, b0;
+ c7fffffff = _mm_set1_epi32 (0x7fffffff);
+ a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
+ b0 = _mm_and_ps (b, *(__m128*)&c7fffffff);
+ a0 = _mm_cmplt_ps (a0, b0);
+ return (*(__m128i*)&a0);
+
+}
+
+//*************************Vector test bits************************************
+//*****************************************************************************
+/*VTST (Vector Test Bits) takes each element in a vector, and bitwise logical ANDs them
+with the corresponding element of a second vector. If the result is not zero, the
+corresponding element in the destination vector is set to all ones. Otherwise, it is set to
+all zeros. */
+
+uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b); // VTST.8 q0, q0, q0
+_NEON2SSE_INLINE uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b) // VTST.8 q0, q0, q0
+{
+ __m128i zero, one, res;
+ zero = _mm_setzero_si128 ();
+ one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff
+ res = _mm_and_si128 (a, b);
+ res = _mm_cmpeq_epi8 (res, zero);
+ return _mm_xor_si128(res, one); //invert result
+}
+
+uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b); // VTST.16 q0, q0, q0
+_NEON2SSE_INLINE uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b) // VTST.16 q0, q0, q0
+{
+ __m128i zero, one, res;
+ zero = _mm_setzero_si128 ();
+ one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff
+ res = _mm_and_si128 (a, b);
+ res = _mm_cmpeq_epi16 (res, zero);
+ return _mm_xor_si128(res, one); //invert result
+}
+
+uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b); // VTST.32 q0, q0, q0
+_NEON2SSE_INLINE uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b) // VTST.32 q0, q0, q0
+{
+ __m128i zero, one, res;
+ zero = _mm_setzero_si128 ();
+ one = _mm_cmpeq_epi8(zero,zero); //0xfff..ffff
+ res = _mm_and_si128 (a, b);
+ res = _mm_cmpeq_epi32 (res, zero);
+ return _mm_xor_si128(res, one); //invert result
+}
+
+uint8x16_t vtstq_u8(uint8x16_t a, uint8x16_t b); // VTST.8 q0, q0, q0
+#define vtstq_u8 vtstq_s8
+
+uint16x8_t vtstq_u16(uint16x8_t a, uint16x8_t b); // VTST.16 q0, q0, q0
+#define vtstq_u16 vtstq_s16
+
+uint32x4_t vtstq_u32(uint32x4_t a, uint32x4_t b); // VTST.32 q0, q0, q0
+#define vtstq_u32 vtstq_s32
+
+uint8x16_t vtstq_p8(poly8x16_t a, poly8x16_t b); // VTST.8 q0, q0, q0
+#define vtstq_p8 vtstq_u8
+
+//****************** Absolute difference ********************
+//*** Absolute difference between the arguments: Vr[i] = | Va[i] - Vb[i] |*****
+//************************************************************
+#if defined(USE_SSSE3)
+
+#endif
+
+#if defined(USE_SSSE3)
+int8x16_t vabdq_s8(int8x16_t a, int8x16_t b); // VABD.S8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vabdq_s8(int8x16_t a, int8x16_t b) // VABD.S8 q0,q0,q0
+{
+ __m128i res;
+ res = _mm_sub_epi8 (a, b);
+ return _mm_abs_epi8 (res);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vabdq_s16(int16x8_t a, int16x8_t b); // VABD.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vabdq_s16(int16x8_t a, int16x8_t b) // VABD.S16 q0,q0,q0
+{
+ __m128i res;
+ res = _mm_sub_epi16 (a,b);
+ return _mm_abs_epi16 (res);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vabdq_s32(int32x4_t a, int32x4_t b); // VABD.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b) // VABD.S32 q0,q0,q0
+{
+ __m128i res;
+ res = _mm_sub_epi32 (a,b);
+ return _mm_abs_epi32 (res);
+}
+#endif
+
+uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b) //no abs for unsigned
+{
+ __m128i cmp, difab, difba;
+ cmp = vcgtq_u8(a,b);
+ difab = _mm_sub_epi8(a,b);
+ difba = _mm_sub_epi8 (b,a);
+ difab = _mm_and_si128(cmp, difab);
+ difba = _mm_andnot_si128(cmp, difba);
+ return _mm_or_si128(difab, difba);
+}
+
+uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.s16 q0,q0,q0
+_NEON2SSE_INLINE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b)
+{
+ __m128i cmp, difab, difba;
+ cmp = vcgtq_u16(a,b);
+ difab = _mm_sub_epi16(a,b);
+ difba = _mm_sub_epi16 (b,a);
+ difab = _mm_and_si128(cmp, difab);
+ difba = _mm_andnot_si128(cmp, difba);
+ return _mm_or_si128(difab, difba);
+}
+
+uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b); // VABD.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b)
+{
+ __m128i cmp, difab, difba;
+ cmp = vcgtq_u32(a,b);
+ difab = _mm_sub_epi32(a,b);
+ difba = _mm_sub_epi32 (b,a);
+ difab = _mm_and_si128(cmp, difab);
+ difba = _mm_andnot_si128(cmp, difba);
+ return _mm_or_si128(difab, difba);
+}
+
+float32x4_t vabdq_f32(float32x4_t a, float32x4_t b); // VABD.F32 q0,q0,q0
+_NEON2SSE_INLINE float32x4_t vabdq_f32(float32x4_t a, float32x4_t b) // VABD.F32 q0,q0,q0
+{
+ __m128i c1;
+ __m128 res;
+ c1 = _mm_set1_epi32(0x7fffffff);
+ res = _mm_sub_ps (a, b);
+ return _mm_and_ps (res, *(__m128*)&c1);
+}
+
+//************ Absolute difference - long **************************
+//********************************************************************
+
+//**********Absolute difference and accumulate: Vr[i] = Va[i] + | Vb[i] - Vc[i] | *************
+//*********************************************************************************************
+
+#if defined(USE_SSSE3)
+int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c); // VABA.S8 q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) // VABA.S8 q0,q0,q0
+{
+ int8x16_t sub;
+ sub = vabdq_s8(b, c);
+ return vaddq_s8( a, sub);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c); // VABA.S16 q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) // VABA.S16 q0,q0,q0
+{
+ int16x8_t sub;
+ sub = vabdq_s16(b, c);
+ return vaddq_s16( a, sub);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c); // VABA.S32 q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) // VABA.S32 q0,q0,q0
+{
+ int32x4_t sub;
+ sub = vabdq_s32(b, c);
+ return vaddq_s32( a, sub);
+}
+#endif
+
+uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VABA.U8 q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c)
+{
+ uint8x16_t sub;
+ sub = vabdq_u8(b, c);
+ return vaddq_u8( a, sub);
+}
+
+uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VABA.s16 q0,q0,q0
+_NEON2SSE_INLINE uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+ uint16x8_t sub;
+ sub = vabdq_u16(b, c);
+ return vaddq_u16( a, sub);
+}
+
+uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VABA.U32 q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ uint32x4_t sub;
+ sub = vabdq_u32(b, c);
+ return vaddq_u32( a, sub);
+}
+
+//************** Absolute difference and accumulate - long ********************************
+//*************************************************************************************
+
+//***********************************************************************************
+//**************** Maximum and minimum operations **********************************
+//***********************************************************************************
+//************* Maximum: vmax -> Vr[i] := (Va[i] >= Vb[i]) ? Va[i] : Vb[i] *******
+//***********************************************************************************
+
+int8x16_t vmaxq_s8(int8x16_t a, int8x16_t b); // VMAX.S8 q0,q0,q0
+#define vmaxq_s8 _MM_MAX_EPI8 //SSE4.1
+
+int16x8_t vmaxq_s16(int16x8_t a, int16x8_t b); // VMAX.S16 q0,q0,q0
+#define vmaxq_s16 _mm_max_epi16
+
+int32x4_t vmaxq_s32(int32x4_t a, int32x4_t b); // VMAX.S32 q0,q0,q0
+#define vmaxq_s32 _MM_MAX_EPI32 //SSE4.1
+
+uint8x16_t vmaxq_u8(uint8x16_t a, uint8x16_t b); // VMAX.U8 q0,q0,q0
+#define vmaxq_u8 _mm_max_epu8
+
+uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.s16 q0,q0,q0
+#define vmaxq_u16 _MM_MAX_EPU16 //SSE4.1
+
+uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0
+#define vmaxq_u32 _MM_MAX_EPU32 //SSE4.1
+
+float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
+#define vmaxq_f32 _mm_max_ps
+
+//*************** Minimum: vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i] ********************************
+//***********************************************************************************************************
+
+int8x16_t vminq_s8(int8x16_t a, int8x16_t b); // VMIN.S8 q0,q0,q0
+#define vminq_s8 _MM_MIN_EPI8 //SSE4.1
+
+int16x8_t vminq_s16(int16x8_t a, int16x8_t b); // VMIN.S16 q0,q0,q0
+#define vminq_s16 _mm_min_epi16
+
+int32x4_t vminq_s32(int32x4_t a, int32x4_t b); // VMIN.S32 q0,q0,q0
+#define vminq_s32 _MM_MIN_EPI32 //SSE4.1
+
+uint8x16_t vminq_u8(uint8x16_t a, uint8x16_t b); // VMIN.U8 q0,q0,q0
+#define vminq_u8 _mm_min_epu8
+
+uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.s16 q0,q0,q0
+#define vminq_u16 _MM_MIN_EPU16 //SSE4.1
+
+uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0
+#define vminq_u32 _MM_MIN_EPU32 //SSE4.1
+
+float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
+#define vminq_f32 _mm_min_ps
+
+//************* Pairwise addition operations. **************************************
+//************************************************************************************
+//Pairwise add - adds adjacent pairs of elements of two vectors, and places the results in the destination vector
+
+//************************** Long pairwise add **********************************
+//*********************************************************************************
+//Adds adjacent pairs of elements of a vector,sign or zero extends the results to twice their original width,
+// and places the final results in the destination vector.
+
+#if defined(USE_SSSE3)
+int16x8_t vpaddlq_s8(int8x16_t a); // VPADDL.S8 q0,q0
+_NEON2SSE_INLINE int16x8_t vpaddlq_s8(int8x16_t a) // VPADDL.S8 q0,q0
+{ //no 8 bit hadd in IA32, need to go to 16 bit
+ __m128i r16_1, r16_2;
+ r16_1 = _MM_CVTEPI8_EPI16 (a); // SSE 4.1
+ //swap hi and low part of r to process the remaining data
+ r16_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ r16_2 = _MM_CVTEPI8_EPI16 (r16_2);
+ return _mm_hadd_epi16 (r16_1, r16_2);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vpaddlq_s16(int16x8_t a); // VPADDL.S16 q0,q0
+_NEON2SSE_INLINE int32x4_t vpaddlq_s16(int16x8_t a) // VPADDL.S16 q0,q0
+{ //no 8 bit hadd in IA32, need to go to 16 bit
+ __m128i r32_1, r32_2;
+ r32_1 = _MM_CVTEPI16_EPI32(a);
+ //swap hi and low part of r to process the remaining data
+ r32_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ r32_2 = _MM_CVTEPI16_EPI32 (r32_2);
+ return _mm_hadd_epi32 (r32_1, r32_2);
+}
+#endif
+
+int64x2_t vpaddlq_s32(int32x4_t a); // VPADDL.S32 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vpaddlq_s32(int32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL) // VPADDL.S32 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int32_t atmp[4];
+ _NEON2SSE_ALIGN_16 int64_t res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ res[0] = (int64_t)atmp[0] + (int64_t)atmp[1];
+ res[1] = (int64_t)atmp[2] + (int64_t)atmp[3];
+ return _mm_load_si128((__m128i*)res);
+}
+
+#if defined(USE_SSSE3)
+uint16x8_t vpaddlq_u8(uint8x16_t a); // VPADDL.U8 q0,q0
+_NEON2SSE_INLINE uint16x8_t vpaddlq_u8(uint8x16_t a) // VPADDL.U8 q0,q0
+{ //no 8 bit hadd in IA32, need to go to 16 bit
+ __m128i r16_1, r16_2;
+ r16_1 = _MM_CVTEPU8_EPI16(a);
+ //swap hi and low part of r to process the remaining data
+ r16_2 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ r16_2 = _MM_CVTEPU8_EPI16 (r16_2);
+ return _mm_hadd_epi16 (r16_1, r16_2);
+}
+#endif
+
+uint32x4_t vpaddlq_u16(uint16x8_t a); // VPADDL.s16 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vpaddlq_u16(uint16x8_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{ //serial solution looks faster than a SIMD one
+ _NEON2SSE_ALIGN_16 uint16_t atmp[8];
+ _NEON2SSE_ALIGN_16 uint32_t res[4];
+ _mm_store_si128((__m128i*)atmp, a);
+ res[0] = (uint32_t)atmp[0] + (uint32_t)atmp[1];
+ res[1] = (uint32_t)atmp[2] + (uint32_t)atmp[3];
+ res[2] = (uint32_t)atmp[4] + (uint32_t)atmp[5];
+ res[3] = (uint32_t)atmp[6] + (uint32_t)atmp[7];
+ return _mm_load_si128((__m128i*)res);
+}
+
+uint64x2_t vpaddlq_u32(uint32x4_t a); // VPADDL.U32 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vpaddlq_u32(uint32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ _NEON2SSE_ALIGN_16 uint32_t atmp[4];
+ _NEON2SSE_ALIGN_16 uint64_t res[2];
+ _mm_store_si128((__m128i*)atmp, a);
+ res[0] = (uint64_t)atmp[0] + (uint64_t)atmp[1];
+ res[1] = (uint64_t)atmp[2] + (uint64_t)atmp[3];
+ return _mm_load_si128((__m128i*)res);
+}
+
+//************************ Long pairwise add and accumulate **************************
+//****************************************************************************************
+//VPADAL (Vector Pairwise Add and Accumulate Long) adds adjacent pairs of elements of a vector,
+// and accumulates the values of the results into the elements of the destination (wide) vector
+
+#if defined(USE_SSSE3)
+int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b); // VPADAL.S8 q0,q0
+_NEON2SSE_INLINE int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b) // VPADAL.S8 q0,q0
+{
+ int16x8_t pad;
+ pad = vpaddlq_s8(b);
+ return _mm_add_epi16 (a, pad);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b); // VPADAL.S16 q0,q0
+_NEON2SSE_INLINE int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b) // VPADAL.S16 q0,q0
+{
+ int32x4_t pad;
+ pad = vpaddlq_s16(b);
+ return _mm_add_epi32(a, pad);
+}
+#endif
+
+int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b); // VPADAL.S32 q0,q0
+_NEON2SSE_INLINE int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)
+{
+ int64x2_t pad;
+ pad = vpaddlq_s32(b);
+ return _mm_add_epi64 (a, pad);
+}
+
+#if defined(USE_SSSE3)
+uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b); // VPADAL.U8 q0,q0
+_NEON2SSE_INLINE uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b) // VPADAL.U8 q0,q0
+{
+ uint16x8_t pad;
+ pad = vpaddlq_u8(b);
+ return _mm_add_epi16 (a, pad);
+}
+#endif
+
+uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b); // VPADAL.s16 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ uint32x4_t pad;
+ pad = vpaddlq_u16(b);
+ return _mm_add_epi32(a, pad);
+} //no optimal SIMD solution, serial is faster
+
+uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b); // VPADAL.U32 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{ //no optimal SIMD solution, serial is faster
+ uint64x2_t pad;
+ pad = vpaddlq_u32(b);
+ return _mm_add_epi64(a, pad);
+} //no optimal SIMD solution, serial is faster
+
+//********** Folding maximum *************************************
+//*******************************************************************
+//VPMAX (Vector Pairwise Maximum) compares adjacent pairs of elements in two vectors,
+//and copies the larger of each pair into the corresponding element in the destination
+// no corresponding functionality in IA32 SIMD, so we need to do the vertical comparison
+
+// ***************** Folding minimum ****************************
+// **************************************************************
+//vpmin -> takes minimum of adjacent pairs
+
+//***************************************************************
+//*********** Reciprocal/Sqrt ************************************
+//***************************************************************
+//****************** Reciprocal estimate *******************************
+
+//the ARM NEON and x86 SIMD results may be slightly different
+
+float32x4_t vrecpeq_f32(float32x4_t a); // VRECPE.F32 q0,q0
+//the ARM NEON and x86 SIMD results may be slightly different
+#define vrecpeq_f32 _mm_rcp_ps
+
+uint32x4_t vrecpeq_u32(uint32x4_t a); // VRECPE.U32 q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrecpeq_u32(uint32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{ //no reciprocal for ints in IA32 available, neither for unsigned int to float 4 lanes conversion, so serial solution looks faster
+ _NEON2SSE_ALIGN_16 uint32_t atmp[4], res[4];
+ _mm_store_si128((__m128i*)atmp, a);
+ res[0] = (atmp[0]) ? 1 / atmp[0] : 0xffffffff;
+ res[1] = (atmp[1]) ? 1 / atmp[1] : 0xffffffff;
+ return _mm_load_si128((__m128i*)res);
+}
+
+//**********Reciprocal square root estimate ****************
+//**********************************************************
+//no reciprocal square root for ints in IA32 available, neither for unsigned int to float4 lanes conversion, so a serial solution looks faster
+
+float32x4_t vrsqrteq_f32(float32x4_t a); // VRSQRTE.F32 q0,q0
+//the ARM NEON and x86 SIMD results may be slightly different
+#define vrsqrteq_f32 _mm_rsqrt_ps
+
+uint32x4_t vrsqrteq_u32(uint32x4_t a); // VRSQRTE.U32 q0,q0
+#define vrsqrteq_u32(a) _mm_castps_si128(_mm_rsqrt_ps(_M128(a)) )
+
+//************ Reciprocal estimate/step and 1/sqrt estimate/step ***************************
+//******************************************************************************************
+//******VRECPS (Vector Reciprocal Step) ***************************************************
+//multiplies the elements of one vector by the corresponding elements of another vector,
+//subtracts each of the results from 2, and places the final results into the elements of the destination vector.
+
+float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b); // VRECPS.F32 q0, q0, q0
+_NEON2SSE_INLINE float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b) // VRECPS.F32 q0, q0, q0
+{
+ __m128 f2, mul;
+ f2 = _mm_set1_ps(2.);
+ mul = _mm_mul_ps(a,b);
+ return _mm_sub_ps(f2,mul);
+}
+
+//*****************VRSQRTS (Vector Reciprocal Square Root Step) *****************************
+//multiplies the elements of one vector by the corresponding elements of another vector,
+//subtracts each of the results from 3, divides these results by two, and places the final results into the elements of the destination vector.
+
+float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b); // VRSQRTS.F32 q0, q0, q0
+_NEON2SSE_INLINE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b) // VRSQRTS.F32 q0, q0, q0
+{
+ __m128 f3, f05, mul;
+ f3 = _mm_set1_ps(3.);
+ f05 = _mm_set1_ps(0.5);
+ mul = _mm_mul_ps(a,b);
+ f3 = _mm_sub_ps(f3,mul);
+ return _mm_mul_ps (f3, f05);
+}
+//********************************************************************************************
+//***************************** Shifts by signed variable ***********************************
+//********************************************************************************************
+//***** Vector shift left: Vr[i] := Va[i] << Vb[i] (negative values shift right) ***********************
+//********************************************************************************************
+//No such operations in IA32 SIMD unfortunately, constant shift only available, so need to do the serial solution
+//helper macro. It matches ARM implementation for big shifts
+#define SERIAL_SHIFT(TYPE, INTERNAL_TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 INTERNAL_TYPE btmp[LENMAX]; int i, lanesize = sizeof(INTERNAL_TYPE) << 3; \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if( (btmp[i] >= lanesize)||(btmp[i] <= -lanesize) ) res[i] = 0; \
+ else res[i] = (btmp[i] >=0) ? atmp[i] << btmp[i] : atmp[i] >> (-btmp[i]); } \
+ return _mm_load_si128((__m128i*)res);
+
+int8x16_t vshlq_s8(int8x16_t a, int8x16_t b); // VSHL.S8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(int8_t, int8_t, 16, 16)
+}
+
+int16x8_t vshlq_s16(int16x8_t a, int16x8_t b); // VSHL.S16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(int16_t, int16_t, 8, 8)
+}
+
+int32x4_t vshlq_s32(int32x4_t a, int32x4_t b); // VSHL.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(int32_t, int32_t, 4, 4)
+}
+
+int64x2_t vshlq_s64(int64x2_t a, int64x2_t b); // VSHL.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(int64_t, int64_t, 2, 2)
+}
+
+uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b); // VSHL.U8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(uint8_t, int8_t, 16, 16)
+}
+
+uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b); // VSHL.s16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(uint16_t, int16_t, 8, 8)
+}
+
+uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b); // VSHL.U32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(uint32_t, int32_t, 4, 4)
+}
+
+uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b); // VSHL.U64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SHIFT(uint64_t, int64_t, 2, 2)
+}
+
+//*********** Vector saturating shift left: (negative values shift right) **********************
+//********************************************************************************************
+//No such operations in IA32 SIMD available yet, constant shift only available, so need to do the serial solution
+#define SERIAL_SATURATING_SHIFT_SIGNED(TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX], btmp[LENMAX]; TYPE limit; int i; \
+ int lanesize_1 = (sizeof(TYPE) << 3) - 1; \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if (atmp[i] ==0) res[i] = 0; \
+ else{ \
+ if(btmp[i] <0) res[i] = atmp[i] >> (-btmp[i]); \
+ else{ \
+ if (btmp[i]>lanesize_1) { \
+ res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \
+ }else{ \
+ limit = (TYPE)1 << (lanesize_1 - btmp[i]); \
+ if((atmp[i] >= limit)||(atmp[i] <= -limit)) \
+ res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \
+ else res[i] = atmp[i] << btmp[i]; }}}} \
+ return _mm_load_si128((__m128i*)res);
+
+#define SERIAL_SATURATING_SHIFT_UNSIGNED(TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 _UNSIGNED_T(TYPE) atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 TYPE btmp[LENMAX]; _UNSIGNED_T(TYPE) limit; int i; \
+ TYPE lanesize = (sizeof(TYPE) << 3); \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if (atmp[i] ==0) {res[i] = 0; \
+ }else{ \
+ if(btmp[i] < 0) res[i] = atmp[i] >> (-btmp[i]); \
+ else{ \
+ if (btmp[i]>lanesize) res[i] = ~((TYPE)0); \
+ else{ \
+ limit = (TYPE) 1 << (lanesize - btmp[i]); \
+ res[i] = ( atmp[i] >= limit) ? res[i] = ~((TYPE)0) : atmp[i] << btmp[i]; }}}} \
+ return _mm_load_si128((__m128i*)res);
+
+int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b); // VQSHL.S8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_SIGNED(int8_t, 16, 16)
+}
+
+int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b); // VQSHL.S16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_SIGNED(int16_t, 8, 8)
+}
+
+int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b); // VQSHL.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_SIGNED(int32_t, 4, 4)
+}
+
+int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b); // VQSHL.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_SIGNED(int64_t, 2, 2)
+}
+
+uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b); // VQSHL.U8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_UNSIGNED(int8_t, 16, 16)
+}
+
+uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b); // VQSHL.s16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_UNSIGNED(int16_t, 8, 8)
+}
+
+uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b); // VQSHL.U32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_UNSIGNED(int32_t, 4, 4)
+}
+
+uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b); // VQSHL.U64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_SHIFT_UNSIGNED(int64_t, 2, 2)
+}
+
+//******** Vector rounding shift left: (negative values shift right) **********
+//****************************************************************************
+//No such operations in IA32 SIMD available yet, constant shift only available, so need to do the serial solution
+//rounding makes sense for right shifts only.
+#define SERIAL_ROUNDING_SHIFT(TYPE, INTERNAL_TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 INTERNAL_TYPE btmp[LENMAX]; INTERNAL_TYPE i, lanesize = sizeof(INTERNAL_TYPE) << 3; \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if( btmp[i] >= 0) { \
+ if(btmp[i] >= lanesize) res[i] = 0; \
+ else res[i] = (atmp[i] << btmp[i]); \
+ }else{ \
+ res[i] = (btmp[i] < -lanesize) ? res[i] = 0 : \
+ (btmp[i] == -lanesize) ? (atmp[i] & ((INTERNAL_TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) : \
+ (atmp[i] >> (-btmp[i])) + ( (atmp[i] & ((INTERNAL_TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); }} \
+ return _mm_load_si128((__m128i*)res);
+
+int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b); // VRSHL.S8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(int8_t, int8_t, 16, 16)
+}
+
+int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b); // VRSHL.S16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(int16_t, int16_t, 8, 8)
+}
+
+int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b); // VRSHL.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(int32_t, int32_t, 4, 4)
+}
+
+int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b); // VRSHL.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(int64_t, int64_t, 2, 2)
+}
+
+uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b); // VRSHL.U8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(uint8_t, int8_t, 16, 16)
+}
+
+uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b); // VRSHL.s16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(uint16_t, int16_t, 8, 8)
+}
+
+uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b); // VRSHL.U32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(uint32_t, int32_t, 4, 4)
+}
+
+uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b); // VRSHL.U64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_ROUNDING_SHIFT(uint64_t, int64_t, 2, 2)
+}
+
+//********** Vector saturating rounding shift left: (negative values shift right) ****************
+//*************************************************************************************************
+//No such operations in IA32 SIMD unfortunately, constant shift only available, so need to do the serial solution
+//Saturation happens for left shifts only while rounding makes sense for right shifts only.
+#define SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 TYPE atmp[LENMAX], res[LENMAX], btmp[LENMAX]; TYPE limit; int i; \
+ int lanesize_1 = (sizeof(TYPE) << 3) - 1; \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if (atmp[i] ==0) res[i] = 0; \
+ else{ \
+ if(btmp[i] <0) res[i] = (btmp[i] < (-lanesize_1)) ? 0 : (atmp[i] >> (-btmp[i])) + ( (atmp[i] & ((TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); \
+ else{ \
+ if (btmp[i]>lanesize_1) { \
+ res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \
+ }else{ \
+ limit = (TYPE)1 << (lanesize_1 - btmp[i]); \
+ if((atmp[i] >= limit)||(atmp[i] <= -limit)) \
+ res[i] = ((_UNSIGNED_T(TYPE))atmp[i] >> lanesize_1 ) + ((TYPE)1 << lanesize_1) - 1; \
+ else res[i] = atmp[i] << btmp[i]; }}}} \
+ return _mm_load_si128((__m128i*)res);
+
+#define SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(TYPE, LENMAX, LEN) \
+ _NEON2SSE_ALIGN_16 _UNSIGNED_T(TYPE) atmp[LENMAX], res[LENMAX]; _NEON2SSE_ALIGN_16 TYPE btmp[LENMAX]; _UNSIGNED_T(TYPE) limit; int i; \
+ int lanesize = (sizeof(TYPE) << 3); \
+ _mm_store_si128((__m128i*)atmp, a); _mm_store_si128((__m128i*)btmp, b); \
+ for (i = 0; i<LEN; i++) { \
+ if (atmp[i] ==0) {res[i] = 0; \
+ }else{ \
+ if(btmp[i] < 0) res[i] = (btmp[i] < (-lanesize)) ? 0 : (atmp[i] >> (-btmp[i])) + ( (atmp[i] & ((TYPE)1 << (-btmp[i] - 1))) >> (-btmp[i] - 1) ); \
+ else{ \
+ if (btmp[i]>lanesize) res[i] = ~((TYPE)0); \
+ else{ \
+ limit = (TYPE) 1 << (lanesize - btmp[i]); \
+ res[i] = ( atmp[i] >= limit) ? res[i] = ~((TYPE)0) : atmp[i] << btmp[i]; }}}} \
+ return _mm_load_si128((__m128i*)res);
+
+int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b); // VQRSHL.S8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int8_t, 16, 16)
+}
+
+int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b); // VQRSHL.S16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int16_t, 8, 8)
+}
+
+int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b); // VQRSHL.S32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int32_t, 4, 4)
+}
+
+int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b); // VQRSHL.S64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_SIGNED(int64_t, 2, 2)
+}
+
+uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b); // VQRSHL.U8 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int8_t, 16, 16)
+}
+
+uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b); // VQRSHL.s16 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int16_t, 8, 8)
+}
+
+uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b); // VQRSHL.U32 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int32_t, 4, 4)
+}
+
+uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b); // VQRSHL.U64 q0,q0,q0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ SERIAL_SATURATING_ROUNDING_SHIFT_UNSIGNED(int64_t, 2, 2)
+}
+
+// *********************************************************************************
+// ***************************** Shifts by a constant *****************************
+// *********************************************************************************
+//**************** Vector shift right by constant*************************************
+//************************************************************************************
+
+int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VSHR.S8 q0,q0,#8
+_NEON2SSE_INLINE int8x16_t vshrq_n_s8(int8x16_t a, __constrange(1,8) int b) // VSHR.S8 q0,q0,#8
+{ //no 8 bit shift available, go to 16 bit trick
+ __m128i zero, mask0, a_sign, r, a_sign_mask;
+ _NEON2SSE_ALIGN_16 int16_t mask0_16[9] = {0x0000, 0x0080, 0x00c0, 0x00e0, 0x00f0, 0x00f8, 0x00fc, 0x00fe, 0x00ff};
+ zero = _mm_setzero_si128();
+ mask0 = _mm_set1_epi16(mask0_16[b]); //to mask the bits to be "spoiled" by 16 bit shift
+ a_sign = _mm_cmpgt_epi8 (zero, a); //ff if a<0 or zero if a>0
+ r = _mm_srai_epi16 (a, b);
+ a_sign_mask = _mm_and_si128 (mask0, a_sign);
+ r = _mm_andnot_si128 (mask0, r);
+ return _mm_or_si128 (r, a_sign_mask);
+}
+
+int16x8_t vshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VSHR.S16 q0,q0,#16
+#define vshrq_n_s16 _mm_srai_epi16
+
+int32x4_t vshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VSHR.S32 q0,q0,#32
+#define vshrq_n_s32 _mm_srai_epi32
+
+int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VSHR.S64 q0,q0,#64
+_NEON2SSE_INLINE int64x2_t vshrq_n_s64(int64x2_t a, __constrange(1,64) int b)
+{ //SIMD implementation may be not optimal due to 64 bit arithmetic shift absense in x86 SIMD
+ __m128i c1, signmask,a0, res64;
+ _NEON2SSE_ALIGN_16 uint64_t mask[] = {0x8000000000000000, 0x8000000000000000};
+ c1 = _mm_cmpeq_epi32(a,a); //0xffffffffffffffff
+ signmask = _mm_slli_epi64 (c1, (64 - b));
+ a0 = _mm_or_si128(a, *(__m128i*)mask); //get the first bit
+ #ifdef USE_SSE4
+ a0 = _mm_cmpeq_epi64 (a, a0); //SSE4.1
+ #else
+ a0 = _mm_cmpeq_epi32 (a, a0);
+ a0 = _mm_shuffle_epi32 (a0, 1 | (1 << 2) | (3 << 4) | (3 << 6)); //copy the information from hi to low part of the 64 bit data
+ #endif
+ signmask = _mm_and_si128(a0, signmask);
+ res64 = _mm_srli_epi64 (a, b);
+ return _mm_or_si128(res64, signmask);
+}
+
+uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VSHR.U8 q0,q0,#8
+_NEON2SSE_INLINE uint8x16_t vshrq_n_u8(uint8x16_t a, __constrange(1,8) int b) // VSHR.U8 q0,q0,#8
+{ //no 8 bit shift available, need the special trick
+ __m128i mask0, r;
+ _NEON2SSE_ALIGN_16 uint16_t mask10_16[9] = {0xffff, 0xff7f, 0xff3f, 0xff1f, 0xff0f, 0xff07, 0xff03, 0xff01, 0xff00};
+ mask0 = _mm_set1_epi16(mask10_16[b]); //to mask the bits to be "spoiled" by 16 bit shift
+ r = _mm_srli_epi16 ( a, b);
+ return _mm_and_si128 (r, mask0);
+}
+
+uint16x8_t vshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VSHR.s16 q0,q0,#16
+#define vshrq_n_u16 _mm_srli_epi16
+
+uint32x4_t vshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VSHR.U32 q0,q0,#32
+#define vshrq_n_u32 _mm_srli_epi32
+
+uint64x2_t vshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VSHR.U64 q0,q0,#64
+#define vshrq_n_u64 _mm_srli_epi64
+
+//*************************** Vector shift left by constant *************************
+//*********************************************************************************
+
+int8x16_t vshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0
+#define vshlq_n_s8 vshlq_n_u8
+
+int16x8_t vshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0
+#define vshlq_n_s16 _mm_slli_epi16
+
+int32x4_t vshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0
+#define vshlq_n_s32 _mm_slli_epi32
+
+int64x2_t vshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0
+#define vshlq_n_s64 _mm_slli_epi64
+
+uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VSHL.I8 q0,q0,#0
+_NEON2SSE_INLINE uint8x16_t vshlq_n_u8(uint8x16_t a, __constrange(0,7) int b)
+{ //no 8 bit shift available, need the special trick
+ __m128i mask0, r;
+ _NEON2SSE_ALIGN_16 uint16_t mask10_16[9] = {0xffff, 0xfeff, 0xfcff, 0xf8ff, 0xf0ff, 0xe0ff, 0xc0ff, 0x80ff, 0xff};
+ mask0 = _mm_set1_epi16(mask10_16[b]); //to mask the bits to be "spoiled" by 16 bit shift
+ r = _mm_slli_epi16 ( a, b);
+ return _mm_and_si128 (r, mask0);
+}
+
+uint16x8_t vshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VSHL.I16 q0,q0,#0
+#define vshlq_n_u16 vshlq_n_s16
+
+uint32x4_t vshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VSHL.I32 q0,q0,#0
+#define vshlq_n_u32 vshlq_n_s32
+
+uint64x2_t vshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VSHL.I64 q0,q0,#0
+#define vshlq_n_u64 vshlq_n_s64
+
+//************* Vector rounding shift right by constant ******************
+//*************************************************************************
+//No corresponding x86 intrinsics exist, need to do some tricks
+
+int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b); // VRSHR.S8 q0,q0,#8
+_NEON2SSE_INLINE int8x16_t vrshrq_n_s8(int8x16_t a, __constrange(1,8) int b) // VRSHR.S8 q0,q0,#8
+{ //no 8 bit shift available, go to 16 bit trick
+ __m128i r, mask1, maskb;
+ _NEON2SSE_ALIGN_16 uint16_t mask2b[9] = {0x0000, 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; // 2^b-th bit set to 1
+ r = vshrq_n_s8 (a, b);
+ mask1 = _mm_set1_epi16(mask2b[b]); // 2^b-th bit set to 1 for 16bit, need it for rounding
+ maskb = _mm_and_si128(a, mask1); //get b or 0 for rounding
+ maskb = _mm_srli_epi16 (maskb, b - 1); // to add 1
+ return _mm_add_epi8(r, maskb); //actual rounding
+}
+
+int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b); // VRSHR.S16 q0,q0,#16
+_NEON2SSE_INLINE int16x8_t vrshrq_n_s16(int16x8_t a, __constrange(1,16) int b) // VRSHR.S16 q0,q0,#16
+{
+ __m128i maskb, r;
+ maskb = _mm_slli_epi16(a, (16 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi16(maskb, 15); //1 or 0
+ r = _mm_srai_epi16 (a, b);
+ return _mm_add_epi16 (r, maskb); //actual rounding
+}
+
+int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b); // VRSHR.S32 q0,q0,#32
+_NEON2SSE_INLINE int32x4_t vrshrq_n_s32(int32x4_t a, __constrange(1,32) int b) // VRSHR.S32 q0,q0,#32
+{
+ __m128i maskb, r;
+ maskb = _mm_slli_epi32 (a, (32 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi32 (maskb,31); //1 or 0
+ r = _mm_srai_epi32(a, b);
+ return _mm_add_epi32 (r, maskb); //actual rounding
+}
+
+int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b); // VRSHR.S64 q0,q0,#64
+_NEON2SSE_INLINE int64x2_t vrshrq_n_s64(int64x2_t a, __constrange(1,64) int b)
+{ //solution may be not optimal compared with a serial one
+ __m128i maskb;
+ int64x2_t r;
+ maskb = _mm_slli_epi64 (a, (64 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi64 (maskb,63); //1 or 0
+ r = vshrq_n_s64(a, b);
+ return _mm_add_epi64 (r, maskb); //actual rounding
+}
+
+uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b); // VRSHR.U8 q0,q0,#8
+_NEON2SSE_INLINE uint8x16_t vrshrq_n_u8(uint8x16_t a, __constrange(1,8) int b) // VRSHR.U8 q0,q0,#8
+{ //no 8 bit shift available, go to 16 bit trick
+ __m128i r, mask1, maskb;
+ _NEON2SSE_ALIGN_16 uint16_t mask2b[9] = {0x0000, 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; // 2^b-th bit set to 1
+ r = vshrq_n_u8 (a, b);
+ mask1 = _mm_set1_epi16(mask2b[b]); // 2^b-th bit set to 1 for 16bit, need it for rounding
+ maskb = _mm_and_si128(a, mask1); //get b or 0 for rounding
+ maskb = _mm_srli_epi16 (maskb, b - 1); // to add 1
+ return _mm_add_epi8(r, maskb); //actual rounding
+}
+
+uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b); // VRSHR.s16 q0,q0,#16
+_NEON2SSE_INLINE uint16x8_t vrshrq_n_u16(uint16x8_t a, __constrange(1,16) int b) // VRSHR.S16 q0,q0,#16
+{
+ __m128i maskb, r;
+ maskb = _mm_slli_epi16(a, (16 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi16(maskb, 15); //1 or 0
+ r = _mm_srli_epi16 (a, b);
+ return _mm_add_epi16 (r, maskb); //actual rounding
+}
+
+uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b); // VRSHR.U32 q0,q0,#32
+_NEON2SSE_INLINE uint32x4_t vrshrq_n_u32(uint32x4_t a, __constrange(1,32) int b) // VRSHR.S32 q0,q0,#32
+{
+ __m128i maskb, r;
+ maskb = _mm_slli_epi32 (a, (32 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi32 (maskb,31); //1 or 0
+ r = _mm_srli_epi32(a, b);
+ return _mm_add_epi32 (r, maskb); //actual rounding
+}
+
+uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b); // VRSHR.U64 q0,q0,#64
+_NEON2SSE_INLINE uint64x2_t vrshrq_n_u64(uint64x2_t a, __constrange(1,64) int b)
+{ //solution may be not optimal compared with a serial one
+ __m128i maskb, r;
+ maskb = _mm_slli_epi64 (a, (64 - b)); //to get rounding (b-1)th bit
+ maskb = _mm_srli_epi64 (maskb,63); //1 or 0
+ r = _mm_srli_epi64(a, b);
+ return _mm_add_epi64 (r, maskb); //actual rounding
+}
+
+//************* Vector shift right by constant and accumulate *********
+//*********************************************************************
+
+int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRA.S8 q0,q0,#8
+_NEON2SSE_INLINE int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VSRA.S8 q0,q0,#8
+{
+ int8x16_t shift;
+ shift = vshrq_n_s8(b, c);
+ return vaddq_s8(a, shift);
+}
+
+int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRA.S16 q0,q0,#16
+_NEON2SSE_INLINE int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VSRA.S16 q0,q0,#16
+{
+ int16x8_t shift;
+ shift = vshrq_n_s16(b, c);
+ return vaddq_s16(a, shift);
+}
+
+int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRA.S32 q0,q0,#32
+_NEON2SSE_INLINE int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VSRA.S32 q0,q0,#32
+{
+ int32x4_t shift;
+ shift = vshrq_n_s32(b, c);
+ return vaddq_s32(a, shift);
+}
+
+int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRA.S64 q0,q0,#64
+_NEON2SSE_INLINE int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c) // VSRA.S64 q0,q0,#64
+{
+ int64x2_t shift;
+ shift = vshrq_n_s64(b, c);
+ return vaddq_s64( a, shift);
+}
+
+uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRA.U8 q0,q0,#8
+_NEON2SSE_INLINE uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c) // VSRA.U8 q0,q0,#8
+{
+ uint8x16_t shift;
+ shift = vshrq_n_u8(b, c);
+ return vaddq_u8(a, shift);
+}
+
+uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRA.s16 q0,q0,#16
+_NEON2SSE_INLINE uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c) // VSRA.s16 q0,q0,#16
+{
+ uint16x8_t shift;
+ shift = vshrq_n_u16(b, c);
+ return vaddq_u16(a, shift);
+}
+
+uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRA.U32 q0,q0,#32
+_NEON2SSE_INLINE uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c) // VSRA.U32 q0,q0,#32
+{
+ uint32x4_t shift;
+ shift = vshrq_n_u32(b, c);
+ return vaddq_u32(a, shift);
+}
+
+uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRA.U64 q0,q0,#64
+_NEON2SSE_INLINE uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c) // VSRA.U64 q0,q0,#64
+{
+ uint64x2_t shift;
+ shift = vshrq_n_u64(b, c);
+ return vaddq_u64(a, shift);
+}
+
+//************* Vector rounding shift right by constant and accumulate ****************************
+//************************************************************************************************
+
+int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VRSRA.S8 q0,q0,#8
+_NEON2SSE_INLINE int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VRSRA.S8 q0,q0,#8
+{
+ int8x16_t shift;
+ shift = vrshrq_n_s8(b, c);
+ return vaddq_s8(a, shift);
+}
+
+int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VRSRA.S16 q0,q0,#16
+_NEON2SSE_INLINE int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VRSRA.S16 q0,q0,#16
+{
+ int16x8_t shift;
+ shift = vrshrq_n_s16(b, c);
+ return vaddq_s16(a, shift);
+}
+
+int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VRSRA.S32 q0,q0,#32
+_NEON2SSE_INLINE int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VRSRA.S32 q0,q0,#32
+{
+ int32x4_t shift;
+ shift = vrshrq_n_s32(b, c);
+ return vaddq_s32(a, shift);
+}
+
+int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VRSRA.S64 q0,q0,#64
+_NEON2SSE_INLINE int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c)
+{
+ int64x2_t shift;
+ shift = vrshrq_n_s64(b, c);
+ return vaddq_s64(a, shift);
+}
+
+uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VRSRA.U8 q0,q0,#8
+_NEON2SSE_INLINE uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c) // VRSRA.U8 q0,q0,#8
+{
+ uint8x16_t shift;
+ shift = vrshrq_n_u8(b, c);
+ return vaddq_u8(a, shift);
+}
+
+uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VRSRA.s16 q0,q0,#16
+_NEON2SSE_INLINE uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c) // VRSRA.s16 q0,q0,#16
+{
+ uint16x8_t shift;
+ shift = vrshrq_n_u16(b, c);
+ return vaddq_u16(a, shift);
+}
+
+uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VRSRA.U32 q0,q0,#32
+_NEON2SSE_INLINE uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c) // VRSRA.U32 q0,q0,#32
+{
+ uint32x4_t shift;
+ shift = vrshrq_n_u32(b, c);
+ return vaddq_u32(a, shift);
+}
+
+uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VRSRA.U64 q0,q0,#64
+_NEON2SSE_INLINE uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c)
+{
+ uint64x2_t shift;
+ shift = vrshrq_n_u64(b, c);
+ return vaddq_u64(a, shift);
+}
+
+//**********************Vector saturating shift left by constant *****************************
+//********************************************************************************************
+//we don't check const ranges assuming they are met
+
+int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHL.S8 q0,q0,#0
+_NEON2SSE_INLINE int8x16_t vqshlq_n_s8(int8x16_t a, __constrange(0,7) int b) // VQSHL.S8 q0,q0,#0
+{ // go to 16 bit to get the auto saturation (in packs function)
+ __m128i a128, r128_1, r128_2;
+ a128 = _MM_CVTEPI8_EPI16 (a); //SSE 4.1
+ r128_1 = _mm_slli_epi16 (a128, b);
+ //swap hi and low part of a128 to process the remaining data
+ a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ a128 = _MM_CVTEPI8_EPI16 (a128);
+ r128_2 = _mm_slli_epi16 (a128, b);
+ return _mm_packs_epi16 (r128_1, r128_2); //saturated s8
+}
+
+int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHL.S16 q0,q0,#0
+_NEON2SSE_INLINE int16x8_t vqshlq_n_s16(int16x8_t a, __constrange(0,15) int b) // VQSHL.S16 q0,q0,#0
+{ // manual saturation solution looks LESS optimal than 32 bits conversion one
+ // go to 32 bit to get the auto saturation (in packs function)
+ __m128i a128, r128_1, r128_2;
+ a128 = _MM_CVTEPI16_EPI32 (a); //SSE 4.1
+ r128_1 = _mm_slli_epi32 (a128, b); //shift_res
+ //swap hi and low part of a128 to process the remaining data
+ a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ a128 = _MM_CVTEPI16_EPI32 (a128);
+ r128_2 = _mm_slli_epi32 (a128, b);
+ return _mm_packs_epi32 (r128_1, r128_2); //saturated s16
+}
+
+int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHL.S32 q0,q0,#0
+_NEON2SSE_INLINE int32x4_t vqshlq_n_s32(int32x4_t a, __constrange(0,31) int b) // VQSHL.S32 q0,q0,#0
+{ // no 64 bit saturation option available, special tricks necessary
+ __m128i c1, maskA, saturation_mask, c7ffffff_mask, shift_res, shift_res_mask;
+ c1 = _mm_cmpeq_epi32(a,a); //0xff..ff
+ maskA = _mm_srli_epi32(c1, b + 1); //mask for positive numbers (32-b+1) zeros and b-1 ones
+ saturation_mask = _mm_cmpgt_epi32 (a, maskA); //0xff...ff if we need saturation, 0 otherwise
+ c7ffffff_mask = _mm_srli_epi32(saturation_mask, 1); //saturated to 0x7f..ff when needed and zeros if not
+ shift_res = _mm_slli_epi32 (a, b);
+ shift_res_mask = _mm_andnot_si128(saturation_mask, shift_res);
+ //result with positive numbers saturated
+ shift_res = _mm_or_si128 (c7ffffff_mask, shift_res_mask);
+ //treat negative numbers
+ maskA = _mm_slli_epi32(c1, 31 - b); //mask for negative numbers b-1 ones and (32-b+1) zeros
+ saturation_mask = _mm_cmpgt_epi32 (maskA,a); //0xff...ff if we need saturation, 0 otherwise
+ c7ffffff_mask = _mm_slli_epi32(saturation_mask, 31); //saturated to 0x80..00 when needed and zeros if not
+ shift_res_mask = _mm_andnot_si128(saturation_mask, shift_res);
+ return _mm_or_si128 (c7ffffff_mask, shift_res_mask);
+}
+
+int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHL.S64 q0,q0,#0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int64x2_t vqshlq_n_s64(int64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL)
+{ // no effective SIMD solution here
+ _NEON2SSE_ALIGN_16 int64_t atmp[2], res[2];
+ int64_t bmask;
+ int i;
+ bmask = ( int64_t)1 << (63 - b); //positive
+ _mm_store_si128((__m128i*)atmp, a);
+ for (i = 0; i<2; i++) {
+ if (atmp[i] >= bmask) {
+ res[i] = ~(_SIGNBIT64);
+ } else {
+ res[i] = (atmp[i] <= -bmask) ? _SIGNBIT64 : atmp[i] << b;
+ }
+ }
+ return _mm_load_si128((__m128i*)res);
+}
+
+uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b); // VQSHL.U8 q0,q0,#0
+_NEON2SSE_INLINE uint8x16_t vqshlq_n_u8(uint8x16_t a, __constrange(0,7) int b) // VQSHL.U8 q0,q0,#0
+{ // go to 16 bit to get the auto saturation (in packs function)
+ __m128i a128, r128_1, r128_2;
+ a128 = _MM_CVTEPU8_EPI16 (a); //SSE 4.1
+ r128_1 = _mm_slli_epi16 (a128, b);
+ //swap hi and low part of a128 to process the remaining data
+ a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ a128 = _MM_CVTEPU8_EPI16 (a128);
+ r128_2 = _mm_slli_epi16 (a128, b);
+ return _mm_packus_epi16 (r128_1, r128_2); //saturated u8
+}
+
+uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b); // VQSHL.s16 q0,q0,#0
+_NEON2SSE_INLINE uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b) // VQSHL.s16 q0,q0,#0
+{ // manual saturation solution looks more optimal than 32 bits conversion one
+ __m128i cb, c8000, a_signed, saturation_mask, shift_res;
+ cb = _mm_set1_epi16((1 << (16 - b)) - 1 - 0x8000 );
+ c8000 = _mm_set1_epi16 (0x8000);
+//no unsigned shorts comparison in SSE, only signed available, so need the trick
+ a_signed = _mm_sub_epi16(a, c8000); //go to signed
+ saturation_mask = _mm_cmpgt_epi16 (a_signed, cb);
+ shift_res = _mm_slli_epi16 (a, b);
+ return _mm_or_si128 (shift_res, saturation_mask);
+}
+
+uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b); // VQSHL.U32 q0,q0,#0
+_NEON2SSE_INLINE uint32x4_t vqshlq_n_u32(uint32x4_t a, __constrange(0,31) int b) // VQSHL.U32 q0,q0,#0
+{ // manual saturation solution, no 64 bit saturation option, the serial version may be faster
+ __m128i cb, c80000000, a_signed, saturation_mask, shift_res;
+ cb = _mm_set1_epi32((1 << (32 - b)) - 1 - 0x80000000 );
+ c80000000 = _mm_set1_epi32 (0x80000000);
+//no unsigned ints comparison in SSE, only signed available, so need the trick
+ a_signed = _mm_sub_epi32(a, c80000000); //go to signed
+ saturation_mask = _mm_cmpgt_epi32 (a_signed, cb);
+ shift_res = _mm_slli_epi32 (a, b);
+ return _mm_or_si128 (shift_res, saturation_mask);
+}
+
+uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b); // VQSHL.U64 q0,q0,#0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshlq_n_u64(uint64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL)
+{ // no effective SIMD solution here
+ _NEON2SSE_ALIGN_16 uint64_t atmp[2], res[2];
+ uint64_t bmask;
+ int i;
+ bmask = ( uint64_t)1 << (64 - b);
+ _mm_store_si128((__m128i*)atmp, a);
+ for (i = 0; i<2; i++) {
+ res[i] = (atmp[i] >= bmask)&&(b>0) ? 0xffffffffffffffff : atmp[i] << b; //if b=0 we are fine with any a
+ }
+ return _mm_load_si128((__m128i*)res);
+}
+
+//**************Vector signed->unsigned saturating shift left by constant *************
+//*************************************************************************************
+
+uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b); // VQSHLU.S8 q0,q0,#0
+_NEON2SSE_INLINE uint8x16_t vqshluq_n_s8(int8x16_t a, __constrange(0,7) int b) // VQSHLU.S8 q0,q0,#0
+{
+ __m128i a128, r128_1, r128_2;
+ a128 = _MM_CVTEPI8_EPI16 (a); //SSE 4.1
+ r128_1 = _mm_slli_epi16 (a128, b);
+ //swap hi and low part of a128 to process the remaining data
+ a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ a128 = _MM_CVTEPI8_EPI16 (a128);
+ r128_2 = _mm_slli_epi16 (a128, b);
+ return _mm_packus_epi16 (r128_1, r128_2); //saturated u8
+}
+
+#if defined(USE_SSSE3)
+uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b); // VQSHLU.S16 q0,q0,#0
+_NEON2SSE_INLINE uint16x8_t vqshluq_n_s16(int16x8_t a, __constrange(0,15) int b) // VQSHLU.S16 q0,q0,#0
+{ // manual saturation solution looks LESS optimal than 32 bits conversion one
+ __m128i a128, r128_1, r128_2;
+ a128 = _MM_CVTEPI16_EPI32 (a); //SSE 4.1
+ r128_1 = _mm_slli_epi32 (a128, b); //shift_res
+ //swap hi and low part of a128 to process the remaining data
+ a128 = _mm_shuffle_epi32 (a, _SWAP_HI_LOW32);
+ a128 = _MM_CVTEPI16_EPI32 (a128);
+ r128_2 = _mm_slli_epi32 (a128, b);
+ return _MM_PACKUS_EPI32 (r128_1, r128_2); //saturated s16
+}
+#endif
+
+uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b); // VQSHLU.S32 q0,q0,#0
+_NEON2SSE_INLINE uint32x4_t vqshluq_n_s32(int32x4_t a, __constrange(0,31) int b) // VQSHLU.S32 q0,q0,#0
+{ //solution may be not optimal compared with the serial one
+ __m128i zero, maskA, maskGT0, a0, a_masked, a_shift;
+ zero = _mm_setzero_si128();
+ maskA = _mm_cmpeq_epi32(a, a);
+ maskA = _mm_slli_epi32(maskA,(32 - b)); // b ones and (32-b)zeros
+ //saturate negative numbers to zero
+ maskGT0 = _mm_cmpgt_epi32 (a, zero); // //0xffffffff if positive number and zero otherwise (negative numbers)
+ a0 = _mm_and_si128 (a, maskGT0); //negative are zeros now
+ //saturate positive to 0xffffffff
+ a_masked = _mm_and_si128 (a0, maskA);
+ a_masked = _mm_cmpgt_epi32 (a_masked, zero); //0xffffffff if saturation necessary 0 otherwise
+ a_shift = _mm_slli_epi32 (a0, b);
+ return _mm_or_si128 (a_shift, a_masked); //actual saturation
+}
+
+uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b); // VQSHLU.S64 q0,q0,#0
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint64x2_t vqshluq_n_s64(int64x2_t a, __constrange(0,63) int b), _NEON2SSE_REASON_SLOW_SERIAL)
+{ // no effective SIMD solution here, serial execution looks faster
+ _NEON2SSE_ALIGN_16 int64_t atmp[2];
+ _NEON2SSE_ALIGN_16 uint64_t res[2];
+ uint64_t limit;
+ int i;
+ _mm_store_si128((__m128i*)atmp, a);
+ for (i = 0; i<2; i++) {
+ if (atmp[i]<=0) {
+ res[i] = 0;
+ } else {
+ limit = (uint64_t) 1 << (64 - b);
+ res[i] = ( ((uint64_t)atmp[i]) >= limit) ? res[i] = ~((uint64_t)0) : atmp[i] << b;
+ }
+ }
+ return _mm_load_si128((__m128i*)res);
+}
+
+//************** Vector narrowing shift right by constant **************
+//**********************************************************************
+
+//************** Vector signed->unsigned narrowing saturating shift right by constant ********
+//*********************************************************************************************
+
+//**** Vector signed->unsigned rounding narrowing saturating shift right by constant *****
+
+//***** Vector narrowing saturating shift right by constant ******
+//*****************************************************************
+
+//********* Vector rounding narrowing shift right by constant *************************
+//****************************************************************************************
+
+//************* Vector rounding narrowing saturating shift right by constant ************
+//****************************************************************************************
+
+//************** Vector widening shift left by constant ****************
+//************************************************************************
+
+//************************************************************************************
+//**************************** Shifts with insert ************************************
+//************************************************************************************
+//takes each element in a vector, shifts them by an immediate value,
+//and inserts the results in the destination vector. Bits shifted out of the each element are lost.
+
+//**************** Vector shift right and insert ************************************
+//Actually the "c" left bits from "a" are the only bits remained from "a" after the shift.
+//All other bits are taken from b shifted.
+
+int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+_NEON2SSE_INLINE int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, __constrange(1,8) int c) // VSRI.8 q0,q0,#8
+{
+ __m128i maskA, a_masked;
+ uint8x16_t b_shift;
+ _NEON2SSE_ALIGN_16 uint8_t maskLeft[9] = {0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; //"a" bits mask, 0 bit not used
+ maskA = _mm_set1_epi8(maskLeft[c]); // c ones and (8-c)zeros
+ a_masked = _mm_and_si128 (a, maskA);
+ b_shift = vshrq_n_u8( b, c); // c zeros on the left in b due to logical shift
+ return _mm_or_si128 (a_masked, b_shift); //combine (insert b into a)
+}
+
+int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+_NEON2SSE_INLINE int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, __constrange(1,16) int c) // VSRI.16 q0,q0,#16
+{ //to cut "c" left bits from a we do shift right and then shift back left providing c right zeros in a
+ uint16x8_t b_shift;
+ uint16x8_t a_c;
+ b_shift = vshrq_n_u16( b, c); // c zeros on the left in b due to logical shift
+ a_c = vshrq_n_u16( a, (16 - c));
+ a_c = _mm_slli_epi16(a_c, (16 - c)); //logical shift provides right "c" bits zeros in a
+ return _mm_or_si128 (a_c, b_shift); //combine (insert b into a)
+}
+
+int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32
+_NEON2SSE_INLINE int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, __constrange(1,32) int c) // VSRI.32 q0,q0,#32
+{ //to cut "c" left bits from a we do shift right and then shift back left providing c right zeros in a
+ uint32x4_t b_shift;
+ uint32x4_t a_c;
+ b_shift = vshrq_n_u32( b, c); // c zeros on the left in b due to logical shift
+ a_c = vshrq_n_u32( a, (32 - c));
+ a_c = _mm_slli_epi32(a_c, (32 - c)); //logical shift provides right "c" bits zeros in a
+ return _mm_or_si128 (a_c, b_shift); //combine (insert b into a)
+}
+
+int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64
+_NEON2SSE_INLINE int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, __constrange(1,64) int c)
+{ //serial solution may be faster
+ uint64x2_t b_shift;
+ uint64x2_t a_c;
+ b_shift = _mm_srli_epi64(b, c); // c zeros on the left in b due to logical shift
+ a_c = _mm_srli_epi64(a, (64 - c));
+ a_c = _mm_slli_epi64(a_c, (64 - c)); //logical shift provides right "c" bits zeros in a
+ return _mm_or_si128 (a_c, b_shift); //combine (insert b into a)
+}
+
+uint8x16_t vsriq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+#define vsriq_n_u8 vsriq_n_s8
+
+uint16x8_t vsriq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+#define vsriq_n_u16 vsriq_n_s16
+
+uint32x4_t vsriq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(1,32) int c); // VSRI.32 q0,q0,#32
+#define vsriq_n_u32 vsriq_n_s32
+
+uint64x2_t vsriq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(1,64) int c); // VSRI.64 q0,q0,#64
+#define vsriq_n_u64 vsriq_n_s64
+
+poly8x16_t vsriq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(1,8) int c); // VSRI.8 q0,q0,#8
+#define vsriq_n_p8 vsriq_n_u8
+
+poly16x8_t vsriq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(1,16) int c); // VSRI.16 q0,q0,#16
+#define vsriq_n_p16 vsriq_n_u16
+
+//***** Vector shift left and insert *********************************************
+//*********************************************************************************
+//Actually the "c" right bits from "a" are the only bits remained from "a" after the shift.
+//All other bits are taken from b shifted. Ending zeros are inserted in b in the shift proces. We need to combine "a" and "b shifted".
+
+int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+_NEON2SSE_INLINE int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, __constrange(0,7) int c) // VSLI.8 q0,q0,#0
+{
+ __m128i maskA, a_masked;
+ int8x16_t b_shift;
+ _NEON2SSE_ALIGN_16 uint8_t maskRight[8] = {0x0, 0x1, 0x3, 0x7, 0x0f, 0x1f, 0x3f, 0x7f}; //"a" bits mask
+ maskA = _mm_set1_epi8(maskRight[c]); // (8-c)zeros and c ones
+ b_shift = vshlq_n_s8( b, c);
+ a_masked = _mm_and_si128 (a, maskA);
+ return _mm_or_si128 (b_shift, a_masked); //combine (insert b into a)
+}
+
+int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+_NEON2SSE_INLINE int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, __constrange(0,15) int c) // VSLI.16 q0,q0,#0
+{ //to cut "c" right bits from a we do shift left and then logical shift back right providing (16-c)zeros in a
+ int16x8_t b_shift;
+ int16x8_t a_c;
+ b_shift = vshlq_n_s16( b, c);
+ a_c = vshlq_n_s16( a, (16 - c));
+ a_c = _mm_srli_epi16(a_c, (16 - c));
+ return _mm_or_si128 (b_shift, a_c); //combine (insert b into a)
+}
+
+int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0
+_NEON2SSE_INLINE int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, __constrange(0,31) int c) // VSLI.32 q0,q0,#0
+{ //solution may be not optimal compared with the serial one
+ //to cut "c" right bits from a we do shift left and then logical shift back right providing (32-c)zeros in a
+ int32x4_t b_shift;
+ int32x4_t a_c;
+ b_shift = vshlq_n_s32( b, c);
+ a_c = vshlq_n_s32( a, (32 - c));
+ a_c = _mm_srli_epi32(a_c, (32 - c));
+ return _mm_or_si128 (b_shift, a_c); //combine (insert b into a)
+}
+
+int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0
+_NEON2SSE_INLINE int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, __constrange(0,63) int c) // VSLI.64 q0,q0,#0
+{ //solution may be not optimal compared with the serial one
+ //to cut "c" right bits from a we do shift left and then logical shift back right providing (64-c)zeros in a
+ int64x2_t b_shift;
+ int64x2_t a_c;
+ b_shift = vshlq_n_s64( b, c);
+ a_c = vshlq_n_s64( a, (64 - c));
+ a_c = _mm_srli_epi64(a_c, (64 - c));
+ return _mm_or_si128 (b_shift, a_c); //combine (insert b into a)
+}
+
+uint8x16_t vsliq_n_u8(uint8x16_t a, uint8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+#define vsliq_n_u8 vsliq_n_s8
+
+uint16x8_t vsliq_n_u16(uint16x8_t a, uint16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+#define vsliq_n_u16 vsliq_n_s16
+
+uint32x4_t vsliq_n_u32(uint32x4_t a, uint32x4_t b, __constrange(0,31) int c); // VSLI.32 q0,q0,#0
+#define vsliq_n_u32 vsliq_n_s32
+
+uint64x2_t vsliq_n_u64(uint64x2_t a, uint64x2_t b, __constrange(0,63) int c); // VSLI.64 q0,q0,#0
+#define vsliq_n_u64 vsliq_n_s64
+
+poly8x16_t vsliq_n_p8(poly8x16_t a, poly8x16_t b, __constrange(0,7) int c); // VSLI.8 q0,q0,#0
+#define vsliq_n_p8 vsliq_n_u8
+
+poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(0,15) int c); // VSLI.16 q0,q0,#0
+#define vsliq_n_p16 vsliq_n_u16
+
+// ***********************************************************************************************
+// ****************** Loads and stores of a single vector ***************************************
+// ***********************************************************************************************
+//Performs loads and stores of a single vector of some type.
+//******************************* Loads ********************************************************
+// ***********************************************************************************************
+//We assume ptr is NOT aligned in general case and use __m128i _mm_loadu_si128 ((__m128i*) ptr);.
+//also for SSE3 supporting systems the __m128i _mm_lddqu_si128 (__m128i const* p) usage for unaligned access may be advantageous.
+// it loads a 32-byte block aligned on a 16-byte boundary and extracts the 16 bytes corresponding to the unaligned access
+//If the ptr is aligned then could use __m128i _mm_load_si128 ((__m128i*) ptr) instead;
+#define LOAD_SI128(ptr) \
+ ( ((unsigned long)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr));
+
+uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+#define vld1q_u8 LOAD_SI128
+
+uint16x8_t vld1q_u16(__transfersize(8) uint16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+#define vld1q_u16 LOAD_SI128
+
+uint32x4_t vld1q_u32(__transfersize(4) uint32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+#define vld1q_u32 LOAD_SI128
+
+uint64x2_t vld1q_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+#define vld1q_u64 LOAD_SI128
+
+int8x16_t vld1q_s8(__transfersize(16) int8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+#define vld1q_s8 LOAD_SI128
+
+int16x8_t vld1q_s16(__transfersize(8) int16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+#define vld1q_s16 LOAD_SI128
+
+int32x4_t vld1q_s32(__transfersize(4) int32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+#define vld1q_s32 LOAD_SI128
+
+int64x2_t vld1q_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+#define vld1q_s64 LOAD_SI128
+
+float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr); // VLD1.16 {d0, d1}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers
+/* _NEON2SSE_INLINE float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr)// VLD1.16 {d0, d1}, [r0]
+{__m128 f1 = _mm_set_ps (ptr[3], ptr[2], ptr[1], ptr[0]);
+__m128 f2;
+f2 = _mm_set_ps (ptr[7], ptr[6], ptr[5], ptr[4]);
+}*/
+
+float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr)
+{
+ if( (((unsigned long)(ptr)) & 15 ) == 0 ) //16 bits aligned
+ return _mm_load_ps(ptr);
+ else
+ return _mm_loadu_ps(ptr);
+}
+
+poly8x16_t vld1q_p8(__transfersize(16) poly8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
+#define vld1q_p8 LOAD_SI128
+
+poly16x8_t vld1q_p16(__transfersize(8) poly16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
+#define vld1q_p16 LOAD_SI128
+
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit like _mm_set_ps (ptr[3], ptr[2], ptr[1], ptr[0]);
+
+//***********************************************************************************************************
+//******* Lane load functions - insert the data at vector's given position (lane) *************************
+//***********************************************************************************************************
+uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0]
+#define vld1q_lane_u8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane)
+
+uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
+#define vld1q_lane_u16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane)
+
+uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0]
+#define vld1q_lane_u32(ptr, vec, lane) _MM_INSERT_EPI32(vec, *(ptr), lane)
+
+uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0]
+#define vld1q_lane_u64(ptr, vec, lane) _MM_INSERT_EPI64(vec, *(ptr), lane); // _p;
+
+int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0]
+#define vld1q_lane_s8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane)
+
+int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
+#define vld1q_lane_s16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane)
+
+int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0]
+#define vld1q_lane_s32(ptr, vec, lane) _MM_INSERT_EPI32(vec, *(ptr), lane)
+
+//current IA SIMD doesn't support float16
+
+float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); // VLD1.32 {d0[0]}, [r0]
+_NEON2SSE_INLINE float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane)
+{ //we need to deal with ptr 16bit NOT aligned case
+ __m128 p;
+ p = _mm_set1_ps(*(ptr));
+ return _MM_INSERT_PS(vec, p, _INSERTPS_NDX(0, lane));
+}
+
+int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); // VLD1.64 {d0}, [r0]
+#define vld1q_lane_s64(ptr, vec, lane) _MM_INSERT_EPI64(vec, *(ptr), lane)
+
+poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); // VLD1.8 {d0[0]}, [r0]
+#define vld1q_lane_p8(ptr, vec, lane) _MM_INSERT_EPI8(vec, *(ptr), lane)
+
+poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
+#define vld1q_lane_p16(ptr, vec, lane) _MM_INSERT_EPI16(vec, *(ptr), lane)
+
+//serial solution may be faster
+
+//current IA SIMD doesn't support float16
+
+// ****************** Load single value ( set all lanes of vector with same value from memory)**********************
+// ******************************************************************************************************************
+uint8x16_t vld1q_dup_u8(__transfersize(1) uint8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+#define vld1q_dup_u8(ptr) _mm_set1_epi8(*(ptr))
+
+uint16x8_t vld1q_dup_u16(__transfersize(1) uint16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+#define vld1q_dup_u16(ptr) _mm_set1_epi16(*(ptr))
+
+uint32x4_t vld1q_dup_u32(__transfersize(1) uint32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+#define vld1q_dup_u32(ptr) _mm_set1_epi32(*(ptr))
+
+uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0]
+_NEON2SSE_INLINE uint64x2_t vld1q_dup_u64(__transfersize(1) uint64_t const * ptr)
+{
+ _NEON2SSE_ALIGN_16 uint64_t val[2] = {*(ptr), *(ptr)};
+ return LOAD_SI128(val);
+}
+
+int8x16_t vld1q_dup_s8(__transfersize(1) int8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+#define vld1q_dup_s8(ptr) _mm_set1_epi8(*(ptr))
+
+int16x8_t vld1q_dup_s16(__transfersize(1) int16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+#define vld1q_dup_s16(ptr) _mm_set1_epi16 (*(ptr))
+
+int32x4_t vld1q_dup_s32(__transfersize(1) int32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+#define vld1q_dup_s32(ptr) _mm_set1_epi32 (*(ptr))
+
+int64x2_t vld1q_dup_s64(__transfersize(1) int64_t const * ptr); // VLD1.64 {d0}, [r0]
+#define vld1q_dup_s64(ptr) vld1q_dup_u64((uint64_t*)ptr)
+
+float16x8_t vld1q_dup_f16(__transfersize(1) __fp16 const * ptr); // VLD1.16 {d0[]}, [r0]
+//current IA SIMD doesn't support float16, need to go to 32 bits
+
+float32x4_t vld1q_dup_f32(__transfersize(1) float32_t const * ptr); // VLD1.32 {d0[]}, [r0]
+#define vld1q_dup_f32(ptr) _mm_set1_ps (*(ptr))
+
+poly8x16_t vld1q_dup_p8(__transfersize(1) poly8_t const * ptr); // VLD1.8 {d0[]}, [r0]
+#define vld1q_dup_p8(ptr) _mm_set1_epi8(*(ptr))
+
+poly16x8_t vld1q_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[]}, [r0]
+#define vld1q_dup_p16(ptr) _mm_set1_epi16 (*(ptr))
+
+//current IA SIMD doesn't support float16
+
+//*************************************************************************************
+//********************************* Store **********************************************
+//*************************************************************************************
+// If ptr is 16bit aligned and you need to store data without cache pollution then use void _mm_stream_si128 ((__m128i*)ptr, val);
+//here we assume the case of NOT 16bit aligned ptr possible. If it is aligned we could to use _mm_store_si128 like shown in the following macro
+#define STORE_SI128(ptr, val) \
+ (((unsigned long)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val);
+
+void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0]
+#define vst1q_u8 STORE_SI128
+
+void vst1q_u16(__transfersize(8) uint16_t * ptr, uint16x8_t val); // VST1.16 {d0, d1}, [r0]
+#define vst1q_u16 STORE_SI128
+
+void vst1q_u32(__transfersize(4) uint32_t * ptr, uint32x4_t val); // VST1.32 {d0, d1}, [r0]
+#define vst1q_u32 STORE_SI128
+
+void vst1q_u64(__transfersize(2) uint64_t * ptr, uint64x2_t val); // VST1.64 {d0, d1}, [r0]
+#define vst1q_u64 STORE_SI128
+
+void vst1q_s8(__transfersize(16) int8_t * ptr, int8x16_t val); // VST1.8 {d0, d1}, [r0]
+#define vst1q_s8 STORE_SI128
+
+void vst1q_s16(__transfersize(8) int16_t * ptr, int16x8_t val); // VST1.16 {d0, d1}, [r0]
+#define vst1q_s16 STORE_SI128
+
+void vst1q_s32(__transfersize(4) int32_t * ptr, int32x4_t val); // VST1.32 {d0, d1}, [r0]
+#define vst1q_s32 STORE_SI128
+
+void vst1q_s64(__transfersize(2) int64_t * ptr, int64x2_t val); // VST1.64 {d0, d1}, [r0]
+#define vst1q_s64 STORE_SI128
+
+void vst1q_f16(__transfersize(8) __fp16 * ptr, float16x8_t val); // VST1.16 {d0, d1}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently
+
+void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val)
+{
+ if( ((unsigned long)(ptr) & 15) == 0 ) //16 bits aligned
+ _mm_store_ps (ptr, val);
+ else
+ _mm_storeu_ps (ptr, val);
+}
+
+void vst1q_p8(__transfersize(16) poly8_t * ptr, poly8x16_t val); // VST1.8 {d0, d1}, [r0]
+#define vst1q_p8 vst1q_u8
+
+void vst1q_p16(__transfersize(8) poly16_t * ptr, poly16x8_t val); // VST1.16 {d0, d1}, [r0]
+#define vst1q_p16 vst1q_u16
+
+//current IA SIMD doesn't support float16
+
+//***********Store a lane of a vector into memory (extract given lane) *********************
+//******************************************************************************************
+void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
+#define vst1q_lane_u8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+
+void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
+#define vst1q_lane_u16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+
+void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
+#define vst1q_lane_u32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
+
+void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); // VST1.64 {d0}, [r0]
+#define vst1q_lane_u64(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI64 (val, lane)
+
+void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
+#define vst1q_lane_s8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+
+void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
+#define vst1q_lane_s16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+
+void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
+#define vst1q_lane_s32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
+
+void vst1q_lane_s64(__transfersize(1) int64_t * ptr, int64x2_t val, __constrange(0,1) int lane); // VST1.64 {d0}, [r0]
+#define vst1q_lane_s64(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI64 (val, lane)
+
+void vst1q_lane_f16(__transfersize(1) __fp16 * ptr, float16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
+_NEON2SSE_INLINE void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane)
+{
+ int32_t ilane;
+ ilane = _MM_EXTRACT_PS(val,lane);
+ *(ptr) = *((float*)&ilane);
+}
+
+void vst1q_lane_p8(__transfersize(1) poly8_t * ptr, poly8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
+#define vst1q_lane_p8 vst1q_lane_u8
+
+void vst1q_lane_p16(__transfersize(1) poly16_t * ptr, poly16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
+#define vst1q_lane_p16 vst1q_lane_s16
+
+//current IA SIMD doesn't support float16
+
+//***********************************************************************************************
+//**************** Loads and stores of an N-element structure **********************************
+//***********************************************************************************************
+//These intrinsics load or store an n-element structure. The array structures are defined in the beginning
+//We assume ptr is NOT aligned in general case, for more details see "Loads and stores of a single vector functions"
+//****************** 2 elements load *********************************************
+uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr); // VLD2.8 {d0, d2}, [r0]
+_NEON2SSE_INLINE uint8x16x2_t vld2q_u8(__transfersize(32) uint8_t const * ptr) // VLD2.8 {d0, d2}, [r0]
+{
+ uint8x16x2_t v;
+ v.val[0] = vld1q_u8(ptr);
+ v.val[1] = vld1q_u8((ptr + 16));
+ v = vuzpq_s8(v.val[0], v.val[1]);
+ return v;
+}
+
+#if defined(USE_SSSE3)
+uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+_NEON2SSE_INLINE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr) // VLD2.16 {d0, d2}, [r0]
+{
+ uint16x8x2_t v;
+ v.val[0] = vld1q_u16( ptr);
+ v.val[1] = vld1q_u16( (ptr + 8));
+ v = vuzpq_s16(v.val[0], v.val[1]);
+ return v;
+}
+#endif
+
+uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+_NEON2SSE_INLINE uint32x4x2_t vld2q_u32(__transfersize(8) uint32_t const * ptr) // VLD2.32 {d0, d2}, [r0]
+{
+ uint32x4x2_t v;
+ v.val[0] = vld1q_u32 ( ptr);
+ v.val[1] = vld1q_u32 ( (ptr + 4));
+ v = vuzpq_s32(v.val[0], v.val[1]);
+ return v;
+}
+
+int8x16x2_t vld2q_s8(__transfersize(32) int8_t const * ptr);
+#define vld2q_s8(ptr) vld2q_u8((uint8_t*) ptr)
+
+#if defined(USE_SSSE3)
+int16x8x2_t vld2q_s16(__transfersize(16) int16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+#define vld2q_s16(ptr) vld2q_u16((uint16_t*) ptr)
+#endif
+
+int32x4x2_t vld2q_s32(__transfersize(8) int32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+#define vld2q_s32(ptr) vld2q_u32((uint32_t*) ptr)
+
+float16x8x2_t vld2q_f16(__transfersize(16) __fp16 const * ptr); // VLD2.16 {d0, d2}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr); // VLD2.32 {d0, d2}, [r0]
+_NEON2SSE_INLINE float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr) // VLD2.32 {d0, d2}, [r0]
+{
+ float32x4x2_t v;
+ v.val[0] = vld1q_f32 (ptr);
+ v.val[1] = vld1q_f32 ((ptr + 4));
+ v = vuzpq_f32(v.val[0], v.val[1]);
+ return v;
+}
+
+poly8x16x2_t vld2q_p8(__transfersize(32) poly8_t const * ptr); // VLD2.8 {d0, d2}, [r0]
+#define vld2q_p8 vld2q_u8
+
+#if defined(USE_SSSE3)
+poly16x8x2_t vld2q_p16(__transfersize(16) poly16_t const * ptr); // VLD2.16 {d0, d2}, [r0]
+#define vld2q_p16 vld2q_u16
+#endif
+
+#if defined(USE_SSSE3)
+uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+_NEON2SSE_INLINE uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr)
+{
+ uint8x8x2_t v;
+ _NEON2SSE_ALIGN_16 int8_t mask8_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};
+ __m128i ld128;
+ ld128 = vld1q_u8(ptr); //merge two 64-bits in 128 bit
+ v.val[0] = _mm_shuffle_epi8(ld128, *(__m128i*)mask8_even_odd);
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+_NEON2SSE_INLINE uint16x4x2_t vld2_u16(__transfersize(8) uint16_t const * ptr)
+{
+ uint16x4x2_t v;
+ _NEON2SSE_ALIGN_16 int8_t mask16_even_odd[16] = { 0,1, 4,5, 8,9, 12,13, 2,3, 6,7, 10,11, 14,15};
+ __m128i ld128;
+ ld128 = vld1q_u16(ptr); //merge two 64-bits in 128 bit
+ v.val[0] = _mm_shuffle_epi8(ld128, *(__m128i*)mask16_even_odd);
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+#endif
+
+uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE uint32x2x2_t vld2_u32(__transfersize(4) uint32_t const * ptr)
+{
+ uint32x2x2_t v;
+ __m128i ld128;
+ ld128 = vld1q_u32(ptr); //merge two 64-bits in 128 bit
+ v.val[0] = _mm_shuffle_epi32(ld128, 0 | (2 << 2) | (1 << 4) | (3 << 6));
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+
+uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+_NEON2SSE_INLINE uint64x1x2_t vld2_u64(__transfersize(2) uint64_t const * ptr)
+{
+ uint64x1x2_t v;
+ v.val[0] = vld1q_u64(ptr);
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+
+#if defined(USE_SSSE3)
+int8x8x2_t vld2_s8(__transfersize(16) int8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+#define vld2_s8(ptr) vld2_u8((uint8_t*)ptr)
+
+int16x4x2_t vld2_s16(__transfersize(8) int16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+#define vld2_s16(ptr) vld2_u16((uint16_t*)ptr)
+#endif
+
+int32x2x2_t vld2_s32(__transfersize(4) int32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+#define vld2_s32(ptr) vld2_u32((uint32_t*)ptr)
+
+int64x1x2_t vld2_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+#define vld2_s64(ptr) vld2_u64((uint64_t*)ptr)
+
+float16x4x2_t vld2_f16(__transfersize(8) __fp16 const * ptr); // VLD2.16 {d0, d1}, [r0]
+
+float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr); // VLD2.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr)
+{
+ float32x2x2_t v;
+ v.val[0] = vld1q_f32(ptr);
+ v.val[0] = _mm_shuffle_ps(v.val[0], v.val[0], _MM_SHUFFLE(3,1, 2, 0));
+ v.val[1] = _mm_movehl_ps(v.val[0],v.val[0]);
+ return v;
+}
+
+#if defined(USE_SSSE3)
+poly8x8x2_t vld2_p8(__transfersize(16) poly8_t const * ptr); // VLD2.8 {d0, d1}, [r0]
+#define vld2_p8 vld2_u8
+
+poly16x4x2_t vld2_p16(__transfersize(8) poly16_t const * ptr); // VLD2.16 {d0, d1}, [r0]
+#define vld2_p16 vld2_u16
+#endif
+
+//******************** Triplets ***************************************
+//*********************************************************************
+#if defined(USE_SSSE3)
+uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE uint8x16x3_t vld3q_u8(__transfersize(48) uint8_t const * ptr) // VLD3.8 {d0, d2, d4}, [r0]
+{ //a0,a1,a2,a3,...a7,a8,...a15, b0,b1,b2,...b7,b8,...b15, c0,c1,c2,...c7,c8,...c15 ->
+ //a:0,3,6,9,12,15,b:2,5,8,11,14, c:1,4,7,10,13
+ //a:1,4,7,10,13, b:0,3,6,9,12,15,c:2,5,8,11,14,
+ //a:2,5,8,11,14, b:1,4,7,10,13, c:0,3,6,9,12,15
+ uint8x16x3_t v;
+ __m128i tmp0, tmp1,tmp2, tmp3;
+ _NEON2SSE_ALIGN_16 int8_t mask8_0[16] = {0,3,6,9,12,15,1,4,7,10,13,2,5,8,11,14};
+ _NEON2SSE_ALIGN_16 int8_t mask8_1[16] = {2,5,8,11,14,0,3,6,9,12,15,1,4,7,10,13};
+ _NEON2SSE_ALIGN_16 int8_t mask8_2[16] = {1,4,7,10,13,2,5,8,11,14,0,3,6,9,12,15};
+
+ v.val[0] = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, ...a15
+ v.val[1] = vld1q_u8 ((ptr + 16)); //b0,b1,b2,b3...b7, ...b15
+ v.val[2] = vld1q_u8 ((ptr + 32)); //c0,c1,c2,c3,...c7,...c15
+
+ tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask8_0); //a:0,3,6,9,12,15,1,4,7,10,13,2,5,8,11
+ tmp1 = _mm_shuffle_epi8(v.val[1], *(__m128i*)mask8_1); //b:2,5,8,11,14,0,3,6,9,12,15,1,4,7,10,13
+ tmp2 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask8_2); //c:1,4,7,10,13,2,5,8,11,14,3,6,9,12,15
+
+ tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,0,0,0,0,0,a0,a3,a6,a9,a12,a15
+ tmp3 = _mm_alignr_epi8(tmp1,tmp3, 10); //a:0,3,6,9,12,15,b:2,5,8,11,14,x,x,x,x,x
+ tmp3 = _mm_slli_si128(tmp3, 5); //0,0,0,0,0,a:0,3,6,9,12,15,b:2,5,8,11,14,
+ tmp3 = _mm_srli_si128(tmp3, 5); //a:0,3,6,9,12,15,b:2,5,8,11,14,:0,0,0,0,0
+ v.val[0] = _mm_slli_si128(tmp2, 11); //0,0,0,0,0,0,0,0,0,0,0,0, 1,4,7,10,13,
+ v.val[0] = _mm_or_si128(v.val[0],tmp3) ;//a:0,3,6,9,12,15,b:2,5,8,11,14,c:1,4,7,10,13,
+
+ tmp3 = _mm_slli_si128(tmp0, 5);//0,0,0,0,0,a:0,3,6,9,12,15,1,4,7,10,13,
+ tmp3 = _mm_srli_si128(tmp3, 11); //a:1,4,7,10,13, 0,0,0,0,0,0,0,0,0,0,0
+ v.val[1] = _mm_srli_si128(tmp1,5); //b:0,3,6,9,12,15,C:1,4,7,10,13, 0,0,0,0,0
+ v.val[1] = _mm_slli_si128(v.val[1], 5);//0,0,0,0,0,b:0,3,6,9,12,15,C:1,4,7,10,13,
+ v.val[1] = _mm_or_si128(v.val[1],tmp3);//a:1,4,7,10,13,b:0,3,6,9,12,15,C:1,4,7,10,13,
+ v.val[1] = _mm_slli_si128(v.val[1],5);//0,0,0,0,0,a:1,4,7,10,13,b:0,3,6,9,12,15,
+ v.val[1] = _mm_srli_si128(v.val[1], 5);//a:1,4,7,10,13,b:0,3,6,9,12,15,0,0,0,0,0
+ tmp3 = _mm_srli_si128(tmp2,5); //c:2,5,8,11,14,0,3,6,9,12,15,0,0,0,0,0
+ tmp3 = _mm_slli_si128(tmp3,11);//0,0,0,0,0,0,0,0,0,0,0,c:2,5,8,11,14,
+ v.val[1] = _mm_or_si128(v.val[1],tmp3);//a:1,4,7,10,13,b:0,3,6,9,12,15,c:2,5,8,11,14,
+
+ tmp3 = _mm_srli_si128(tmp2,10); //c:0,3,6,9,12,15, 0,0,0,0,0,0,0,0,0,0,
+ tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0,0,0,0,0,0, c:0,3,6,9,12,15,
+ v.val[2] = _mm_srli_si128(tmp1,11); //b:1,4,7,10,13,0,0,0,0,0,0,0,0,0,0,0
+ v.val[2] = _mm_slli_si128(v.val[2],5);//0,0,0,0,0,b:1,4,7,10,13, 0,0,0,0,0,0
+ v.val[2] = _mm_or_si128(v.val[2],tmp3);//0,0,0,0,0,b:1,4,7,10,13,c:0,3,6,9,12,15,
+ tmp0 = _mm_srli_si128(tmp0, 11); //a:2,5,8,11,14, 0,0,0,0,0,0,0,0,0,0,0,
+ v.val[2] = _mm_or_si128(v.val[2],tmp0);//a:2,5,8,11,14,b:1,4,7,10,13,c:0,3,6,9,12,15,
+ return v;
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE uint16x8x3_t vld3q_u16(__transfersize(24) uint16_t const * ptr) // VLD3.16 {d0, d2, d4}, [r0]
+{ //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7
+ uint16x8x3_t v;
+ __m128i tmp0, tmp1,tmp2, tmp3;
+ _NEON2SSE_ALIGN_16 int8_t mask16_0[16] = {0,1, 6,7, 12,13, 2,3, 8,9, 14,15, 4,5, 10,11};
+ _NEON2SSE_ALIGN_16 int8_t mask16_1[16] = {2,3, 8,9, 14,15, 4,5, 10,11, 0,1, 6,7, 12,13};
+ _NEON2SSE_ALIGN_16 int8_t mask16_2[16] = {4,5, 10,11, 0,1, 6,7, 12,13, 2,3, 8,9, 14,15};
+
+ v.val[0] = vld1q_u16 (ptr); //a0,a1,a2,a3,...a7,
+ v.val[1] = vld1q_u16 ((ptr + 8)); //b0,b1,b2,b3...b7
+ v.val[2] = vld1q_u16 ((ptr + 16)); //c0,c1,c2,c3,...c7
+
+ tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask16_0); //a0,a3,a6,a1,a4,a7,a2,a5,
+ tmp1 = _mm_shuffle_epi8(v.val[1], *(__m128i*)mask16_1); //b1,b4,b7,b2,b5,b0,b3,b6
+ tmp2 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask16_2); //c2,c5, c0,c3,c6, c1,c4,c7
+
+ tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,a0,a3,a6,
+ tmp3 = _mm_alignr_epi8(tmp1,tmp3, 10); //a0,a3,a6,b1,b4,b7,x,x
+ tmp3 = _mm_slli_si128(tmp3, 4); //0,0, a0,a3,a6,b1,b4,b7
+ tmp3 = _mm_srli_si128(tmp3, 4); //a0,a3,a6,b1,b4,b7,0,0
+ v.val[0] = _mm_slli_si128(tmp2, 12); //0,0,0,0,0,0, c2,c5,
+ v.val[0] = _mm_or_si128(v.val[0],tmp3);//a0,a3,a6,b1,b4,b7,c2,c5
+
+ tmp3 = _mm_slli_si128(tmp0, 4);//0,0,a0,a3,a6,a1,a4,a7
+ tmp3 = _mm_srli_si128(tmp3,10); //a1,a4,a7, 0,0,0,0,0
+ v.val[1] = _mm_srli_si128(tmp1,6); //b2,b5,b0,b3,b6,0,0
+ v.val[1] = _mm_slli_si128(v.val[1], 6); //0,0,0,b2,b5,b0,b3,b6,
+ v.val[1] = _mm_or_si128(v.val[1],tmp3);//a1,a4,a7,b2,b5,b0,b3,b6,
+ v.val[1] = _mm_slli_si128(v.val[1],6);//0,0,0,a1,a4,a7,b2,b5,
+ v.val[1] = _mm_srli_si128(v.val[1], 6);//a1,a4,a7,b2,b5,0,0,0,
+ tmp3 = _mm_srli_si128(tmp2,4); //c0,c3,c6, c1,c4,c7,0,0
+ tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0,c0,c3,c6,
+ v.val[1] = _mm_or_si128(v.val[1],tmp3); //a1,a4,a7,b2,b5,c0,c3,c6,
+
+ tmp3 = _mm_srli_si128(tmp2,10); //c1,c4,c7, 0,0,0,0,0
+ tmp3 = _mm_slli_si128(tmp3,10); //0,0,0,0,0, c1,c4,c7,
+ v.val[2] = _mm_srli_si128(tmp1,10); //b0,b3,b6,0,0, 0,0,0
+ v.val[2] = _mm_slli_si128(v.val[2],4);//0,0, b0,b3,b6,0,0,0
+ v.val[2] = _mm_or_si128(v.val[2],tmp3);//0,0, b0,b3,b6,c1,c4,c7,
+ tmp0 = _mm_srli_si128(tmp0, 12); //a2,a5,0,0,0,0,0,0
+ v.val[2] = _mm_or_si128(v.val[2],tmp0);//a2,a5,b0,b3,b6,c1,c4,c7,
+ return v;
+}
+#endif
+
+uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE uint32x4x3_t vld3q_u32(__transfersize(12) uint32_t const * ptr) // VLD3.32 {d0, d2, d4}, [r0]
+{//a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
+ uint32x4x3_t v;
+ __m128i tmp0, tmp1,tmp2, tmp3;
+ v.val[0] = vld1q_u32 (ptr); //a0,a1,a2,a3,
+ v.val[1] = vld1q_u32 ((ptr + 4)); //b0,b1,b2,b3
+ v.val[2] = vld1q_u32 ((ptr + 8)); //c0,c1,c2,c3,
+
+ tmp0 = _mm_shuffle_epi32(v.val[0], 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,a3,a1,a2
+ tmp1 = _mm_shuffle_epi32(v.val[1], _SWAP_HI_LOW32); //b2,b3,b0,b1
+ tmp2 = _mm_shuffle_epi32(v.val[2], 1 | (2 << 2) | (0 << 4) | (3 << 6)); //c1,c2, c0,c3
+
+ tmp3 = _mm_unpacklo_epi32(tmp1, tmp2); //b2,c1, b3,c2
+ v.val[0] = _mm_unpacklo_epi64(tmp0,tmp3); //a0,a3,b2,c1
+ tmp0 = _mm_unpackhi_epi32(tmp0, tmp1); //a1,b0, a2,b1
+ v.val[1] = _mm_shuffle_epi32(tmp0, _SWAP_HI_LOW32 ); //a2,b1, a1,b0,
+ v.val[1] = _mm_unpackhi_epi64(v.val[1], tmp3); //a1,b0, b3,c2
+ v.val[2] = _mm_unpackhi_epi64(tmp0, tmp2); //a2,b1, c0,c3
+ return v;
+}
+
+#if defined(USE_SSSE3)
+int8x16x3_t vld3q_s8(__transfersize(48) int8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+#define vld3q_s8(ptr) vld3q_u8((uint8_t*) (ptr))
+
+int16x8x3_t vld3q_s16(__transfersize(24) int16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+#define vld3q_s16(ptr) vld3q_u16((uint16_t*) (ptr))
+#endif
+
+int32x4x3_t vld3q_s32(__transfersize(12) int32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+#define vld3q_s32(ptr) vld3q_u32((uint32_t*) (ptr))
+
+float16x8x3_t vld3q_f16(__transfersize(24) __fp16 const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr); // VLD3.32 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr) // VLD3.32 {d0, d2, d4}, [r0]
+{ //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
+ float32x4x3_t v;
+ __m128 tmp0, tmp1,tmp2, tmp3;
+ v.val[0] = vld1q_f32 (ptr); //a0,a1,a2,a3,
+ v.val[1] = vld1q_f32 ((ptr + 4)); //b0,b1,b2,b3
+ v.val[2] = vld1q_f32 ((ptr + 8)); //c0,c1,c2,c3,
+
+ tmp0 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[0]), 0 | (3 << 2) | (1 << 4) | (2 << 6))); //a0,a3,a1,a2
+ tmp1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[1]), _SWAP_HI_LOW32)); //b2,b3,b0,b1
+ tmp2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[2]), 1 | (2 << 2) | (0 << 4) | (3 << 6))); //c1,c2, c0,c3
+ tmp3 = _mm_unpacklo_ps(tmp1, tmp2); //b2,c1, b3,c2
+
+ v.val[0] = _mm_movelh_ps(tmp0,tmp3); //a0,a3,b2,c1
+ tmp0 = _mm_unpackhi_ps(tmp0, tmp1); //a1,b0, a2,b1
+ v.val[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(tmp0), _SWAP_HI_LOW32 )); //a2,b1, a1,b0,
+ v.val[1] = _mm_movehl_ps(tmp3,v.val[1]); //a1,b0, b3,c2
+ v.val[2] = _mm_movehl_ps(tmp2,tmp0); //a2,b1, c0,c3
+ return v;
+}
+
+#if defined(USE_SSSE3)
+poly8x16x3_t vld3q_p8(__transfersize(48) poly8_t const * ptr); // VLD3.8 {d0, d2, d4}, [r0]
+#define vld3q_p8 vld3q_u8
+
+poly16x8x3_t vld3q_p16(__transfersize(24) poly16_t const * ptr); // VLD3.16 {d0, d2, d4}, [r0]
+#define vld3q_p16 vld3q_u16
+#endif
+
+#if defined(USE_SSSE3)
+uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr) // VLD3.8 {d0, d1, d2}, [r0]
+{ //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7
+ uint8x8x3_t v;
+ __m128i tmp0, tmp1;
+ _NEON2SSE_ALIGN_16 int8_t mask8_0[16] = {0,3,6,9,12,15, 1,4,7,10,13, 2,5,8,11,14};
+ _NEON2SSE_ALIGN_16 int8_t mask8_1[16] = {2,5, 0,3,6, 1,4,7, 0,0,0,0,0,0,0,0};
+ v.val[0] = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, b0,b1,b2,b3...b7
+
+ tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask8_0); //a0,a3,a6,b1,b4,b7, a1,a4,a7,b2,b5, a2,a5,b0,b3,b6,
+ tmp1 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask8_1); //c2,c5, c0,c3,c6, c1,c4,c7,x,x,x,x,x,x,x,x
+ v.val[0] = _mm_slli_si128(tmp0,10);
+ v.val[0] = _mm_srli_si128(v.val[0],10); //a0,a3,a6,b1,b4,b7, 0,0,0,0,0,0,0,0,0,0
+ v.val[2] = _mm_slli_si128(tmp1,6);//0,0,0,0,0,0,c2,c5,x,x,x,x,x,x,x,x
+ v.val[0] = _mm_or_si128(v.val[0],v.val[2]) ;//a0,a3,a6,b1,b4,b7,c2,c5 x,x,x,x,x,x,x,x
+
+ v.val[1] = _mm_slli_si128(tmp0,5); //0,0,0,0,0,0,0,0,0,0,0, a1,a4,a7,b2,b5,
+ v.val[1] = _mm_srli_si128(v.val[1],11); //a1,a4,a7,b2,b5,0,0,0,0,0,0,0,0,0,0,0,
+ v.val[2] = _mm_srli_si128(tmp1,2); //c0,c3,c6,c1,c4,c7,x,x,x,x,x,x,x,x,0,0
+ v.val[2] = _mm_slli_si128(v.val[2],5);//0,0,0,0,0,c0,c3,c6,0,0,0,0,0,0,0,0
+ v.val[1] = _mm_or_si128(v.val[1],v.val[2]) ;//a1,a4,a7,b2,b5,c0,c3,c6,x,x,x,x,x,x,x,x
+
+ tmp0 = _mm_srli_si128(tmp0,11); //a2,a5,b0,b3,b6,0,0,0,0,0,0,0,0,0,0,0,
+ v.val[2] = _mm_srli_si128(tmp1,5); //c1,c4,c7,0,0,0,0,0,0,0,0,0,0,0,0,0
+ v.val[2] = _mm_slli_si128(v.val[2],5);//0,0,0,0,0,c1,c4,c7,
+ v.val[2] = _mm_or_si128(tmp0, v.val[2]) ;//a2,a5,b0,b3,b6,c1,c4,c7,x,x,x,x,x,x,x,x
+ return v;
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE uint16x4x3_t vld3_u16(__transfersize(12) uint16_t const * ptr) // VLD3.16 {d0, d1, d2}, [r0]
+{ //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
+ uint16x4x3_t v;
+ __m128i tmp0, tmp1;
+ _NEON2SSE_ALIGN_16 int8_t mask16[16] = {0,1, 6,7, 12,13, 2,3, 8,9, 14,15, 4,5, 10,11};
+ v.val[0] = vld1q_u16 (ptr); //a0,a1,a2,a3, b0,b1,b2,b3
+
+ tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask16); //a0, a3, b2,a1, b0, b3, a2, b1
+ tmp1 = _mm_shufflelo_epi16(v.val[2], 201); //11 00 10 01 : c1, c2, c0, c3,
+ v.val[0] = _mm_slli_si128(tmp0,10);
+ v.val[0] = _mm_srli_si128(v.val[0],10); //a0, a3, b2, 0,0, 0,0,
+ v.val[2] = _mm_slli_si128(tmp1,14);//0,0,0,0,0,0,0,c1
+ v.val[2] = _mm_srli_si128(v.val[2],8);//0,0,0,c1,0,0,0,0
+ v.val[0] = _mm_or_si128(v.val[0],v.val[2]) ;//a0, a3, b2, c1, x,x,x,x
+
+ v.val[1] = _mm_slli_si128(tmp0,4); //0,0,0,0,0,a1, b0, b3
+ v.val[1] = _mm_srli_si128(v.val[1],10); //a1, b0, b3, 0,0, 0,0,
+ v.val[2] = _mm_srli_si128(tmp1,2);//c2, 0,0,0,0,0,0,0,
+ v.val[2] = _mm_slli_si128(v.val[2],6);//0,0,0,c2,0,0,0,0
+ v.val[1] = _mm_or_si128(v.val[1],v.val[2]); //a1, b0, b3, c2, x,x,x,x
+
+ tmp0 = _mm_srli_si128(tmp0,12); //a2, b1,0,0,0,0,0,0
+ tmp1 = _mm_srli_si128(tmp1,4);
+ tmp1 = _mm_slli_si128(tmp1,4); //0,0,c0, c3,
+ v.val[2] = _mm_or_si128(tmp0, tmp1); //a2, b1, c0, c3,
+ return v;
+}
+#endif
+
+uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE uint32x2x3_t vld3_u32(__transfersize(6) uint32_t const * ptr) // VLD3.32 {d0, d1, d2}, [r0]
+{ //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1
+ uint32x2x3_t v;
+ v.val[0] = vld1q_u32 (ptr); //a0,a1, b0,b1,
+
+ v.val[0] = _mm_shuffle_epi32(v.val[0], 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,b1, a1, b0
+ v.val[2] = _mm_slli_si128(v.val[2], 8); //x, x,c0,c1,
+ v.val[1] = _mm_unpackhi_epi32(v.val[0],v.val[2]); //a1,c0, b0, c1
+ v.val[2] = _mm_srli_si128(v.val[1], 8); //b0, c1, x, x,
+ return v;
+}
+uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE uint64x1x3_t vld3_u64(__transfersize(3) uint64_t const * ptr) // VLD1.64 {d0, d1, d2}, [r0]
+{
+ uint64x1x3_t v;
+ v.val[0] = vld1q_u64 (ptr);
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+
+#if defined(USE_SSSE3)
+int8x8x3_t vld3_s8(__transfersize(24) int8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+#define vld3_s8(ptr) vld3_u8((uint8_t*)ptr)
+
+int16x4x3_t vld3_s16(__transfersize(12) int16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+#define vld3_s16(ptr) vld3_u16((uint16_t*)ptr)
+#endif
+
+int32x2x3_t vld3_s32(__transfersize(6) int32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+#define vld3_s32(ptr) vld3_u32((uint32_t*)ptr)
+
+int64x1x3_t vld3_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+#define vld3_s64(ptr) vld3_u64((uint64_t*)ptr)
+
+float16x4x3_t vld3_f16(__transfersize(12) __fp16 const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr); // VLD3.32 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr)
+{ //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1
+ float32x2x3_t v;
+ v.val[0] = vld1q_f32 (ptr); //a0,a1, b0,b1,
+
+ v.val[0] = _mm_shuffle_ps(v.val[0],v.val[0], _MM_SHUFFLE(2,1, 3, 0)); //a0,b1, a1, b0
+ v.val[2] = _mm_movelh_ps(v.val[2], v.val[2]); //x, x,c0,c1,
+ v.val[1] = _mm_unpackhi_ps(v.val[0],v.val[2]); //a1,c0, b0, c1
+ v.val[2] = _mm_movehl_ps(v.val[1], v.val[1]); //b0, c1, x, x,
+ return v;
+}
+
+#if defined(USE_SSSE3)
+poly8x8x3_t vld3_p8(__transfersize(24) poly8_t const * ptr); // VLD3.8 {d0, d1, d2}, [r0]
+#define vld3_p8 vld3_u8
+
+poly16x4x3_t vld3_p16(__transfersize(12) poly16_t const * ptr); // VLD3.16 {d0, d1, d2}, [r0]
+#define vld3_p16 vld3_u16
+#endif
+
+//*************** Quadruples load ********************************
+//*****************************************************************
+uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE uint8x16x4_t vld4q_u8(__transfersize(64) uint8_t const * ptr) // VLD4.8 {d0, d2, d4, d6}, [r0]
+{
+ uint8x16x4_t v;
+ __m128i tmp3, tmp2, tmp1, tmp0;
+
+ v.val[0] = vld1q_u8 ( ptr); //a0,a1,a2,...a7, ...a15
+ v.val[1] = vld1q_u8 ( (ptr + 16));//b0, b1,b2,...b7.... b15
+ v.val[2] = vld1q_u8 ( (ptr + 32));//c0, c1,c2,...c7....c15
+ v.val[3] = vld1q_u8 ( (ptr + 48)); //d0,d1,d2,...d7....d15
+
+ tmp0= _mm_unpacklo_epi8(v.val[0],v.val[1]); //a0,b0, a1,b1, a2,b2, a3,b3,....a7,b7
+ tmp1= _mm_unpacklo_epi8(v.val[2],v.val[3]); //c0,d0, c1,d1, c2,d2, c3,d3,... c7,d7
+ tmp2= _mm_unpackhi_epi8(v.val[0],v.val[1]);//a8,b8, a9,b9, a10,b10, a11,b11,...a15,b15
+ tmp3= _mm_unpackhi_epi8(v.val[2],v.val[3]);//c8,d8, c9,d9, c10,d10, c11,d11,...c15,d15
+
+ v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a8, b0,b8, a1,a9, b1,b9, ....a3,a11, b3,b11
+ v.val[1] = _mm_unpackhi_epi8(tmp0, tmp2); //a4,a12, b4,b12, a5,a13, b5,b13,....a7,a15,b7,b15
+ v.val[2] = _mm_unpacklo_epi8(tmp1, tmp3); //c0,c8, d0,d8, c1,c9, d1,d9.....d3,d11
+ v.val[3] = _mm_unpackhi_epi8(tmp1, tmp3); //c4,c12,d4,d12, c5,c13, d5,d13,....d7,d15
+
+ tmp0 = _mm_unpacklo_epi32(v.val[0] , v.val[2] ); ///a0,a8, b0,b8, c0,c8, d0,d8, a1,a9, b1,b9, c1,c9, d1,d9
+ tmp1 = _mm_unpackhi_epi32(v.val[0] , v.val[2] ); //a2,a10, b2,b10, c2,c10, d2,d10, a3,a11, b3,b11, c3,c11, d3,d11
+ tmp2 = _mm_unpacklo_epi32(v.val[1] , v.val[3] ); //a4,a12, b4,b12, c4,c12, d4,d12, a5,a13, b5,b13, c5,c13, d5,d13,
+ tmp3 = _mm_unpackhi_epi32(v.val[1] , v.val[3] ); //a6,a14, b6,b14, c6,c14, d6,d14, a7,a15,b7,b15,c7,c15,d7,d15
+
+ v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a4,a8,a12,b0,b4,b8,b12,c0,c4,c8,c12,d0,d4,d8,d12
+ v.val[1] = _mm_unpackhi_epi8(tmp0, tmp2); //a1,a5, a9, a13, b1,b5, b9,b13, c1,c5, c9, c13, d1,d5, d9,d13
+ v.val[2] = _mm_unpacklo_epi8(tmp1, tmp3); //a2,a6, a10,a14, b2,b6, b10,b14,c2,c6, c10,c14, d2,d6, d10,d14
+ v.val[3] = _mm_unpackhi_epi8(tmp1, tmp3); //a3,a7, a11,a15, b3,b7, b11,b15,c3,c7, c11, c15,d3,d7, d11,d15
+ return v;
+}
+
+uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE uint16x8x4_t vld4q_u16(__transfersize(32) uint16_t const * ptr) // VLD4.16 {d0, d2, d4, d6}, [r0]
+{
+ uint16x8x4_t v;
+ __m128i tmp3, tmp2, tmp1, tmp0;
+ tmp0 = vld1q_u16 (ptr); //a0,a1,a2,...a7
+ tmp1 = vld1q_u16 ((ptr + 8)); //b0, b1,b2,...b7
+ tmp2 = vld1q_u16 ((ptr + 16)); //c0, c1,c2,...c7
+ tmp3 = vld1q_u16 ((ptr + 24)); //d0,d1,d2,...d7
+ v.val[0]= _mm_unpacklo_epi16(tmp0,tmp1); //a0,b0, a1,b1, a2,b2, a3,b3,
+ v.val[1]= _mm_unpacklo_epi16(tmp2,tmp3); //c0,d0, c1,d1, c2,d2, c3,d3,
+ v.val[2]= _mm_unpackhi_epi16(tmp0,tmp1);//a4,b4, a5,b5, a6,b6, a7,b7
+ v.val[3]= _mm_unpackhi_epi16(tmp2,tmp3);//c4,d4, c5,d5, c6,d6, c7,d7
+ tmp0 = _mm_unpacklo_epi16(v.val[0], v.val[2]);//a0,a4, b0,b4, a1,a5, b1,b5
+ tmp1 = _mm_unpackhi_epi16(v.val[0], v.val[2]); //a2,a6, b2,b6, a3,a7, b3,b7
+ tmp2 = _mm_unpacklo_epi16(v.val[1], v.val[3]); //c0,c4, d0,d4, c1,c5, d1,d5
+ tmp3 = _mm_unpackhi_epi16(v.val[1], v.val[3]);//c2,c6, d2,d6, c3,c7, d3,d7
+ v.val[0] = _mm_unpacklo_epi64(tmp0, tmp2); //a0,a4, b0,b4, c0,c4, d0,d4,
+ v.val[1] = _mm_unpackhi_epi64(tmp0, tmp2); //a1,a5, b1,b5, c1,c5, d1,d5
+ v.val[2] = _mm_unpacklo_epi64(tmp1, tmp3); //a2,a6, b2,b6, c2,c6, d2,d6,
+ v.val[3] = _mm_unpackhi_epi64(tmp1, tmp3); //a3,a7, b3,b7, c3,c7, d3,d7
+ return v;
+}
+
+uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE uint32x4x4_t vld4q_u32(__transfersize(16) uint32_t const * ptr) // VLD4.32 {d0, d2, d4, d6}, [r0]
+{
+ uint32x4x4_t v;
+ __m128i tmp3, tmp2, tmp1, tmp0;
+ v.val[0] = vld1q_u32 (ptr);
+ v.val[1] = vld1q_u32 ((ptr + 4));
+ v.val[2] = vld1q_u32 ((ptr + 8));
+ v.val[3] = vld1q_u32 ((ptr + 12));
+ tmp0 = _mm_unpacklo_epi32(v.val[0],v.val[1]);
+ tmp1 = _mm_unpacklo_epi32(v.val[2],v.val[3]);
+ tmp2 = _mm_unpackhi_epi32(v.val[0],v.val[1]);
+ tmp3 = _mm_unpackhi_epi32(v.val[2],v.val[3]);
+ v.val[0] = _mm_unpacklo_epi64(tmp0, tmp1);
+ v.val[1] = _mm_unpackhi_epi64(tmp0, tmp1);
+ v.val[2] = _mm_unpacklo_epi64(tmp2, tmp3);
+ v.val[3] = _mm_unpackhi_epi64(tmp2, tmp3);
+ return v;
+}
+
+int8x16x4_t vld4q_s8(__transfersize(64) int8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+#define vld4q_s8(ptr) vld4q_u8((uint8_t*)ptr)
+
+int16x8x4_t vld4q_s16(__transfersize(32) int16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+#define vld4q_s16(ptr) vld4q_u16((uint16_t*)ptr)
+
+int32x4x4_t vld4q_s32(__transfersize(16) int32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+#define vld4q_s32(ptr) vld4q_u32((uint32_t*)ptr)
+
+float16x8x4_t vld4q_f16(__transfersize(32) __fp16 const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr); // VLD4.32 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr) // VLD4.32 {d0, d2, d4, d6}, [r0]
+{
+ float32x4x4_t v;
+ __m128 tmp3, tmp2, tmp1, tmp0;
+
+ v.val[0] = vld1q_f32 ((float*) ptr);
+ v.val[1] = vld1q_f32 ((float*) (ptr + 4));
+ v.val[2] = vld1q_f32 ((float*) (ptr + 8));
+ v.val[3] = vld1q_f32 ((float*) (ptr + 12));
+ tmp0 = _mm_unpacklo_ps(v.val[0], v.val[1]);
+ tmp2 = _mm_unpacklo_ps(v.val[2], v.val[3]);
+ tmp1 = _mm_unpackhi_ps(v.val[0], v.val[1]);
+ tmp3 = _mm_unpackhi_ps(v.val[2], v.val[3]);
+ v.val[0] = _mm_movelh_ps(tmp0, tmp2);
+ v.val[1] = _mm_movehl_ps(tmp2, tmp0);
+ v.val[2] = _mm_movelh_ps(tmp1, tmp3);
+ v.val[3] = _mm_movehl_ps(tmp3, tmp1);
+ return v;
+}
+
+poly8x16x4_t vld4q_p8(__transfersize(64) poly8_t const * ptr); // VLD4.8 {d0, d2, d4, d6}, [r0]
+#define vld4q_p8 vld4q_u8
+
+poly16x8x4_t vld4q_p16(__transfersize(32) poly16_t const * ptr); // VLD4.16 {d0, d2, d4, d6}, [r0]
+#define vld4q_p16 vld4q_s16
+
+#if defined(USE_SSSE3)
+uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr) // VLD4.8 {d0, d1, d2, d3}, [r0]
+{
+ uint8x8x4_t v;
+ __m128i sh0, sh1;
+ _NEON2SSE_ALIGN_16 int8_t mask4_8[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15};
+
+ v.val[0] = vld1q_u8(( ptr)); //load first 64-bits in val[0] and val[1]
+ v.val[1] = vld1q_u8(( ptr + 16)); //load third and forth 64-bits in val[2], val[3]
+
+ sh0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask4_8);
+ sh1 = _mm_shuffle_epi8(v.val[1], *(__m128i*)mask4_8);
+ v.val[0] = _mm_unpacklo_epi32(sh0,sh1); //0,4,8,12,16,20,24,28, 1,5,9,13,17,21,25,29
+ v.val[2] = _mm_unpackhi_epi32(sh0,sh1); //2,6,10,14,18,22,26,30, 3,7,11,15,19,23,27,31
+ v.val[1] = _mm_shuffle_epi32(v.val[0],_SWAP_HI_LOW32);
+ v.val[3] = _mm_shuffle_epi32(v.val[2],_SWAP_HI_LOW32);
+
+ return v;
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE uint16x4x4_t vld4_u16(__transfersize(16) uint16_t const * ptr) // VLD4.16 {d0, d1, d2, d3}, [r0]
+{
+ uint16x4x4_t v;
+ __m128i sh0, sh1;
+ _NEON2SSE_ALIGN_16 int8_t mask4_16[16] = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; //0, 4, 1, 5, 2, 6, 3, 7
+ v.val[0] = vld1q_u16 ( (ptr)); //load first 64-bits in val[0] and val[1]
+ v.val[2] = vld1q_u16 ( (ptr + 8)); //load third and forth 64-bits in val[2], val[3]
+ sh0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask4_16);
+ sh1 = _mm_shuffle_epi8(v.val[2], *(__m128i*)mask4_16);
+ v.val[0] = _mm_unpacklo_epi32(sh0,sh1); //0,4,8,12, 1,5,9,13
+ v.val[2] = _mm_unpackhi_epi32(sh0,sh1); //2,6,10,14, 3,7,11,15
+ v.val[1] = _mm_shuffle_epi32(v.val[0],_SWAP_HI_LOW32);
+ v.val[3] = _mm_shuffle_epi32(v.val[2],_SWAP_HI_LOW32);
+ return v;
+}
+#endif
+
+uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE uint32x2x4_t vld4_u32(__transfersize(8) uint32_t const * ptr)
+{ //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1
+ uint32x4x4_t v, res;
+ v.val[0] = vld1q_u32 (ptr); //a0,a1, b0,b1,
+ v.val[2] = vld1q_u32 ((ptr + 4)); //c0,c1, d0,d1
+ res.val[0] = _mm_unpacklo_epi32(v.val[0],v.val[2]); //a0, c0, a1,c1,
+ res.val[2] = _mm_unpackhi_epi32(v.val[0],v.val[2]); //b0,d0, b1, d1
+ res.val[1] = _mm_shuffle_epi32(res.val[0],_SWAP_HI_LOW32); //a1,c1, a0, c0,
+ res.val[3] = _mm_shuffle_epi32(res.val[2],_SWAP_HI_LOW32);//b1, d1,b0,d0,
+ return res;
+}
+
+uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE uint64x1x4_t vld4_u64(__transfersize(4) uint64_t const * ptr) // VLD1.64 {d0, d1, d2, d3}, [r0]
+{
+ uint64x1x4_t v;
+ v.val[0] = vld1q_u64( (ptr)); //load first 64-bits in val[0] and val[1]
+ v.val[2] = vld1q_u64( (ptr + 2)); //load third and forth 64-bits in val[2], val[3]
+ return v;
+}
+
+#if defined(USE_SSSE3)
+int8x8x4_t vld4_s8(__transfersize(32) int8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+#define vld4_s8(ptr) vld4_u8((uint8_t*)ptr)
+
+int16x4x4_t vld4_s16(__transfersize(16) int16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+#define vld4_s16(ptr) vld4_u16((uint16_t*)ptr)
+#endif
+
+int32x2x4_t vld4_s32(__transfersize(8) int32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+#define vld4_s32(ptr) vld4_u32((uint32_t*)ptr)
+
+int64x1x4_t vld4_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+#define vld4_s64(ptr) vld4_u64((uint64_t*)ptr)
+
+float16x4x4_t vld4_f16(__transfersize(16) __fp16 const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr); // VLD4.32 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr) // VLD4.32 {d0, d1, d2, d3}, [r0]
+{ //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1
+ float32x2x4_t v, res;
+ v.val[0] = vld1q_f32 ((float*) ptr); //a0,a1, b0,b1,
+ v.val[2] = vld1q_f32 ((float*) (ptr + 4)); //c0,c1, d0,d1
+ res.val[0] = _mm_unpacklo_ps(v.val[0],v.val[2]); //a0, c0, a1,c1,
+ res.val[2] = _mm_unpackhi_ps(v.val[0],v.val[2]); //b0,d0, b1, d1
+ res.val[1] = _mm_movehl_ps(res.val[0],res.val[0]); // a1,c1, a0, c0,
+ res.val[3] = _mm_movehl_ps(res.val[2],res.val[2]); // b1, d1, b0,d0,
+ return res;
+}
+
+#if defined(USE_SSSE3)
+poly8x8x4_t vld4_p8(__transfersize(32) poly8_t const * ptr); // VLD4.8 {d0, d1, d2, d3}, [r0]
+#define vld4_p8 vld4_u8
+
+poly16x4x4_t vld4_p16(__transfersize(16) poly16_t const * ptr); // VLD4.16 {d0, d1, d2, d3}, [r0]
+#define vld4_p16 vld4_u16
+#endif
+
+//************* Duplicate (or propagate) ptr[0] to all val[0] lanes and ptr[1] to all val[1] lanes *******************
+//*******************************************************************************************************************
+uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+_NEON2SSE_INLINE uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr) // VLD2.8 {d0[], d1[]}, [r0]
+{
+ uint8x8x2_t v;
+ v.val[0] = LOAD_SI128(ptr); //0,1,x,x, x,x,x,x,x,x,x,x, x,x,x,x
+ v.val[1] = _mm_unpacklo_epi8(v.val[0],v.val[0]);//0,0,1,1,x,x,x,x, x,x,x,x,x,x,x,x,
+ v.val[1] = _mm_unpacklo_epi16(v.val[1],v.val[1]);//0,0,0,0, 1,1,1,1,x,x,x,x, x,x,x,x
+ v.val[0] = _mm_unpacklo_epi32(v.val[1],v.val[1]);//0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+
+uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+_NEON2SSE_INLINE uint16x4x2_t vld2_dup_u16(__transfersize(2) uint16_t const * ptr) // VLD2.16 {d0[], d1[]}, [r0]
+{
+ uint16x4x2_t v;
+ v.val[1] = LOAD_SI128(ptr); //0,1,x,x, x,x,x,x
+ v.val[0] = _mm_shufflelo_epi16(v.val[1], 0); //00 00 00 00 (all 0)
+ v.val[1] = _mm_shufflelo_epi16(v.val[1], 85);//01 01 01 01 (all 1)
+ return v;
+}
+
+uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+_NEON2SSE_INLINE uint32x2x2_t vld2_dup_u32(__transfersize(2) uint32_t const * ptr) // VLD2.32 {d0[], d1[]}, [r0]
+{
+ uint32x2x2_t v;
+ v.val[0] = LOAD_SI128(ptr); //0,1,x,x
+ v.val[0] = _mm_shuffle_epi32(v.val[0], 0 | (0 << 2) | (1 << 4) | (1 << 6)); //0,0,1,1
+ v.val[1] = _mm_srli_si128(v.val[0], 8); //1,1,0x0,0x0
+ return v;
+}
+
+uint64x1x2_t vld2_dup_u64(__transfersize(2) uint64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+#define vld2_dup_u64 vld2_u64
+
+int8x8x2_t vld2_dup_s8(__transfersize(2) int8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+#define vld2_dup_s8(ptr) vld2_dup_u8((uint8_t*)ptr)
+
+int16x4x2_t vld2_dup_s16(__transfersize(2) int16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+#define vld2_dup_s16(ptr) vld2_dup_u16((uint16_t*)ptr)
+
+int32x2x2_t vld2_dup_s32(__transfersize(2) int32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+#define vld2_dup_s32(ptr) vld2_dup_u32((uint32_t*)ptr)
+
+int64x1x2_t vld2_dup_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+#define vld2_dup_s64(ptr) vld2_dup_u64((uint64_t*)ptr)
+
+float16x4x2_t vld2_dup_f16(__transfersize(2) __fp16 const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr); // VLD2.32 {d0[], d1[]}, [r0]
+_NEON2SSE_INLINE float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr) // VLD2.32 {d0[], d1[]}, [r0]
+{
+ float32x2x2_t v;
+ v.val[0] = vld1q_f32(ptr); //0,1,x,x
+ v.val[1] = _mm_movehdup_ps(v.val[0]); //1,1,x,x
+ v.val[0] = _mm_moveldup_ps(v.val[0]); //0,0,x,x
+ return v;
+}
+
+poly8x8x2_t vld2_dup_p8(__transfersize(2) poly8_t const * ptr); // VLD2.8 {d0[], d1[]}, [r0]
+#define vld2_dup_p8 vld2_dup_u8
+
+poly16x4x2_t vld2_dup_p16(__transfersize(2) poly16_t const * ptr); // VLD2.16 {d0[], d1[]}, [r0]
+#define vld2_dup_p16 vld2_dup_s16
+
+//************* Duplicate (or propagate)triplets: *******************
+//********************************************************************
+//ptr[0] to all val[0] lanes, ptr[1] to all val[1] lanes and ptr[2] to all val[2] lanes
+uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+_NEON2SSE_INLINE uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr) // VLD3.8 {d0[], d1[], d2[]}, [r0]
+{
+ uint8x8x3_t v;
+ v.val[0] = LOAD_SI128(ptr); //0,1,2,x, x,x,x,x,x,x,x,x, x,x,x,x
+ v.val[1] = _mm_unpacklo_epi8(v.val[0],v.val[0]);//0,0,1,1,2,2,x,x, x,x,x,x,x,x,x,x,
+ v.val[1] = _mm_unpacklo_epi16(v.val[1],v.val[1]);//0,0,0,0, 1,1,1,1,2,2,2,2,x,x,x,x,
+ v.val[0] = _mm_unpacklo_epi32(v.val[1],v.val[1]);//0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,
+ v.val[2] = _mm_unpackhi_epi32(v.val[1],v.val[1]);// 2,2,2,2,2,2,2,2, x,x,x,x,x,x,x,x,
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ return v;
+}
+
+uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+_NEON2SSE_INLINE uint16x4x3_t vld3_dup_u16(__transfersize(3) uint16_t const * ptr) // VLD3.16 {d0[], d1[], d2[]}, [r0]
+{
+ uint16x4x3_t v;
+ v.val[2] = LOAD_SI128(ptr); //0,1,2,x, x,x,x,x
+ v.val[0] = _mm_shufflelo_epi16(v.val[2], 0); //00 00 00 00 (all 0)
+ v.val[1] = _mm_shufflelo_epi16(v.val[2], 85);//01 01 01 01 (all 1)
+ v.val[2] = _mm_shufflelo_epi16(v.val[2], 170);//10 10 10 10 (all 2)
+ return v;
+}
+
+uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+_NEON2SSE_INLINE uint32x2x3_t vld3_dup_u32(__transfersize(3) uint32_t const * ptr) // VLD3.32 {d0[], d1[], d2[]}, [r0]
+{
+ uint32x2x3_t v;
+ v.val[2] = LOAD_SI128(ptr); //0,1,2,x
+ v.val[0] = _mm_shuffle_epi32(v.val[2], 0 | (0 << 2) | (2 << 4) | (2 << 6)); //0,0,2,2
+ v.val[1] = _mm_shuffle_epi32(v.val[2], 1 | (1 << 2) | (2 << 4) | (2 << 6)); //1,1,2,2
+ v.val[2] = _mm_srli_si128(v.val[0], 8); //2,2,0x0,0x0
+ return v;
+}
+
+uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE uint64x1x3_t vld3_dup_u64(__transfersize(3) uint64_t const * ptr) // VLD1.64 {d0, d1, d2}, [r0]
+{
+ uint64x1x3_t v;
+ v.val[0] = LOAD_SI128(ptr);//0,1,
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32); //1,0
+ v.val[2] = LOAD_SI128((ptr + 2)); //2,x
+ return v;
+}
+
+int8x8x3_t vld3_dup_s8(__transfersize(3) int8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+#define vld3_dup_s8(ptr) vld3_dup_u8((uint8_t*)ptr)
+
+int16x4x3_t vld3_dup_s16(__transfersize(3) int16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+#define vld3_dup_s16(ptr) vld3_dup_u16((uint16_t*)ptr)
+
+int32x2x3_t vld3_dup_s32(__transfersize(3) int32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+#define vld3_dup_s32(ptr) vld3_dup_u32((uint32_t*)ptr)
+
+int64x1x3_t vld3_dup_s64(__transfersize(3) int64_t const * ptr); // VLD1.64 {d0, d1, d2}, [r0]
+#define vld3_dup_s64(ptr) vld3_dup_u64((uint64_t*)ptr)
+
+float16x4x3_t vld3_dup_f16(__transfersize(3) __fp16 const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr); // VLD3.32 {d0[], d1[], d2[]}, [r0]
+_NEON2SSE_INLINE float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr) // VLD3.32 {d0[], d1[], d2[]}, [r0]
+{
+ float32x2x3_t v;
+ v.val[0] = vld1q_f32(ptr); //0,1,2,x
+ v.val[1] = _mm_movehdup_ps(v.val[0]); //1,1,x,x
+ v.val[0] = _mm_moveldup_ps(v.val[0]); //0,0,2,2
+ v.val[2] = _mm_movehl_ps(v.val[0], v.val[0]); //2,2,0,0,
+ return v;
+}
+
+poly8x8x3_t vld3_dup_p8(__transfersize(3) poly8_t const * ptr); // VLD3.8 {d0[], d1[], d2[]}, [r0]
+#define vld3_dup_p8 vld3_dup_u8
+
+poly16x4x3_t vld3_dup_p16(__transfersize(3) poly16_t const * ptr); // VLD3.16 {d0[], d1[], d2[]}, [r0]
+#define vld3_dup_p16 vld3_dup_s16
+
+//************* Duplicate (or propagate) quadruples: *******************
+//***********************************************************************
+//ptr[0] to all val[0] lanes, ptr[1] to all val[1] lanes, ptr[2] to all val[2] lanes and ptr[3] to all val[3] lanes
+uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+_NEON2SSE_INLINE uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr) // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+{
+ uint8x8x4_t v;
+ v.val[0] = LOAD_SI128(ptr); //0,1,2,3, x,x,x,x,x,x,x,x, x,x,x,x
+ v.val[1] = _mm_unpacklo_epi8(v.val[0],v.val[0]);//0,0,1,1,2,2,3,3, x,x,x,x,x,x,x,x,
+ v.val[1] = _mm_unpacklo_epi16(v.val[1],v.val[1]);//0,0,0,0, 1,1,1,1,2,2,2,2,3,3,3,3
+ v.val[0] = _mm_unpacklo_epi32(v.val[1],v.val[1]);//0,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,
+ v.val[2] = _mm_unpackhi_epi32(v.val[1],v.val[1]);// 2,2,2,2,2,2,2,2, 3,3,3,3, 3,3,3,3
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32);
+ v.val[3] = _mm_shuffle_epi32(v.val[2], _SWAP_HI_LOW32);
+ return v;
+}
+
+uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+_NEON2SSE_INLINE uint16x4x4_t vld4_dup_u16(__transfersize(4) uint16_t const * ptr) // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+{
+ uint16x4x4_t v;
+ v.val[3] = LOAD_SI128(ptr); //0,1,2,3, x,x,x,x
+ v.val[0] = _mm_shufflelo_epi16(v.val[3], 0); //00 00 00 00 (all 0)
+ v.val[1] = _mm_shufflelo_epi16(v.val[3], 85);//01 01 01 01 (all 1)
+ v.val[2] = _mm_shufflelo_epi16(v.val[3], 170);//10 10 10 10 (all 2)
+ v.val[3] = _mm_shufflelo_epi16(v.val[3], 255);//11 11 11 11 (all 3)
+ return v;
+}
+
+uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+_NEON2SSE_INLINE uint32x2x4_t vld4_dup_u32(__transfersize(4) uint32_t const * ptr) // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+{
+ uint32x2x4_t v;
+ v.val[3] = LOAD_SI128(ptr) ; //0,1,2,3
+ v.val[0] = _mm_shuffle_epi32(v.val[3], 0 | (0 << 2) | (2 << 4) | (3 << 6)); //0,0,2,3
+ v.val[1] = _mm_shuffle_epi32(v.val[3], 1 | (1 << 2) | (2 << 4) | (3 << 6)); //1,1,2,3
+ v.val[2] = _mm_shuffle_epi32(v.val[3], 2 | (2 << 2) | (3 << 4) | (3 << 6)); //2,2,3,3
+ v.val[3] = _mm_shuffle_epi32(v.val[3], 3 | (3 << 2) | (3 << 4) | (3 << 6)); //3,3,2,2
+ return v;
+}
+
+uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE uint64x1x4_t vld4_dup_u64(__transfersize(4) uint64_t const * ptr) // VLD1.64 {d0, d1, d2, d3}, [r0]
+{
+ uint64x1x4_t v;
+ v.val[0] = LOAD_SI128(ptr); //0,1,
+ v.val[1] = _mm_shuffle_epi32(v.val[0], _SWAP_HI_LOW32); //1,0
+ v.val[2] = LOAD_SI128((ptr + 2)); //2,3
+ v.val[3] = _mm_shuffle_epi32(v.val[2], _SWAP_HI_LOW32); //3,2
+ return v;
+}
+
+int8x8x4_t vld4_dup_s8(__transfersize(4) int8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+#define vld4_dup_s8(ptr) vld4_dup_u8((uint8_t*)ptr)
+
+int16x4x4_t vld4_dup_s16(__transfersize(4) int16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+#define vld4_dup_s16(ptr) vld4_dup_u16((uint16_t*)ptr)
+
+int32x2x4_t vld4_dup_s32(__transfersize(4) int32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+#define vld4_dup_s32(ptr) vld4_dup_u32((uint32_t*)ptr)
+
+int64x1x4_t vld4_dup_s64(__transfersize(4) int64_t const * ptr); // VLD1.64 {d0, d1, d2, d3}, [r0]
+#define vld4_dup_s64(ptr) vld4_dup_u64((uint64_t*)ptr)
+
+float16x4x4_t vld4_dup_f16(__transfersize(4) __fp16 const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr); // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+_NEON2SSE_INLINE float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr) // VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
+{
+ float32x2x4_t v;
+ v.val[0] = vld1q_f32(ptr); //0,1,2,3
+ v.val[1] = _mm_movehdup_ps(v.val[0]); //1,1,3,3
+ v.val[0] = _mm_moveldup_ps(v.val[0]); //0,0,2,2
+ v.val[2] = _mm_movehl_ps(v.val[0], v.val[0]); //2,2,0,0,
+ v.val[3] = _mm_movehl_ps(v.val[1], v.val[1]); //3,3,1,1,
+ return v;
+}
+
+poly8x8x4_t vld4_dup_p8(__transfersize(4) poly8_t const * ptr); // VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
+#define vld4_dup_p8 vld4_dup_u8
+
+poly16x4x4_t vld4_dup_p16(__transfersize(4) poly16_t const * ptr); // VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
+#define vld4_dup_p16 vld4_dup_u16
+
+//**********************************************************************************
+//*******************Lane loads for an N-element structures ***********************
+//**********************************************************************************
+//********************** Lane pairs ************************************************
+//does vld1_lane_xx ptr[0] to src->val[0] at lane positon and ptr[1] to src->val[1] at lane positon
+//we assume src is 16 bit aligned
+
+//!!!!!! Microsoft compiler does not allow xxxxxx_2t function arguments resulting in "formal parameter with __declspec(align('16')) won't be aligned" error
+//to fix it the all functions below work with xxxxxx_2t pointers and the corresponding original functions are redefined
+
+//uint16x8x2_t vld2q_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x8x2_t src,__constrange(0,7) int lane);// VLD2.16 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t* src,__constrange(0,7) int lane) // VLD2.16 {d0[0], d2[0]}, [r0]
+{
+ uint16x8x2_t v;
+ v.val[0] = vld1q_lane_s16 (ptr, src->val[0], lane);
+ v.val[1] = vld1q_lane_s16 ((ptr + 1), src->val[1], lane);
+ return v;
+}
+#define vld2q_lane_u16(ptr, src, lane) vld2q_lane_u16_ptr(ptr, &src, lane)
+
+//uint32x4x2_t vld2q_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x4x2_t src,__constrange(0,3) int lane);// VLD2.32 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t* src,__constrange(0,3) int lane) // VLD2.32 {d0[0], d2[0]}, [r0]
+{
+ uint32x4x2_t v;
+ v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane);
+ return v;
+}
+#define vld2q_lane_u32(ptr, src, lane) vld2q_lane_u32_ptr(ptr, &src, lane)
+
+//int16x8x2_t vld2q_lane_s16(__transfersize(2) int16_t const * ptr, int16x8x2_t src, __constrange(0,7)int lane);// VLD2.16 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t* src, __constrange(0,7) int lane)
+{
+ int16x8x2_t v;
+ v.val[0] = vld1q_lane_s16 (ptr, src->val[0], lane);
+ v.val[1] = vld1q_lane_s16 ((ptr + 1), src->val[1], lane);
+ return v;
+}
+#define vld2q_lane_s16(ptr, src, lane) vld2q_lane_s16_ptr(ptr, &src, lane)
+
+//int32x4x2_t vld2q_lane_s32(__transfersize(2) int32_t const * ptr, int32x4x2_t src, __constrange(0,3)int lane);// VLD2.32 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t* src, __constrange(0,3) int lane)
+{
+ int32x4x2_t v;
+ v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane);
+ return v;
+}
+#define vld2q_lane_s32(ptr, src, lane) vld2q_lane_s32_ptr(ptr, &src, lane)
+
+//float16x8x2_t vld2q_lane_f16(__transfersize(2) __fp16 const * ptr, float16x8x2_t src, __constrange(0,7)int lane);// VLD2.16 {d0[0], d2[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+//float32x4x2_t vld2q_lane_f32(__transfersize(2) float32_t const * ptr, float32x4x2_t src,__constrange(0,3) int lane);// VLD2.32 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t* src,__constrange(0,3) int lane) // VLD2.32 {d0[0], d2[0]}, [r0]
+{
+ float32x4x2_t v;
+ v.val[0] = vld1q_lane_f32(ptr, src->val[0], lane);
+ v.val[1] = vld1q_lane_f32((ptr + 1), src->val[1], lane);
+ return v;
+}
+#define vld2q_lane_f32(ptr, src, lane) vld2q_lane_f32_ptr(ptr, &src, lane)
+
+//poly16x8x2_t vld2q_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x8x2_t src,__constrange(0,7) int lane);// VLD2.16 {d0[0], d2[0]}, [r0]
+#define vld2q_lane_p16 vld2q_lane_u16
+
+//uint8x8x2_t vld2_lane_u8(__transfersize(2) uint8_t const * ptr, uint8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0]
+_NEON2SSE_INLINE uint8x8x2_t vld2_lane_u8_ptr(__transfersize(2) uint8_t const * ptr, uint8x8x2_t* src, __constrange(0,7) int lane) // VLD2.8 {d0[0], d1[0]}, [r0]
+{
+ uint8x8x2_t val;
+ val.val[0] = _MM_INSERT_EPI8 (src->val[0], (int)ptr[0], lane);
+ val.val[1] = _MM_INSERT_EPI8 (src->val[1], (int)ptr[1], lane);
+ return val;
+}
+#define vld2_lane_u8(ptr, src, lane) vld2_lane_u8_ptr(ptr, &src, lane)
+
+//uint16x4x2_t vld2_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x4x2_t src, __constrange(0,3)int lane);// VLD2.16 {d0[0], d1[0]}, [r0]
+#define vld2_lane_u16 vld2q_lane_u16
+
+//uint32x2x2_t vld2_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x2x2_t src, __constrange(0,1)int lane);// VLD2.32 {d0[0], d1[0]}, [r0]
+#define vld2_lane_u32 vld2q_lane_u32
+
+//int8x8x2_t vld2_lane_s8(__transfersize(2) int8_t const * ptr, int8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0]
+int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); // VLD2.8 {d0[0], d1[0]}, [r0]
+#define vld2_lane_s8(ptr, src, lane) vld2_lane_u8(( uint8_t*) ptr, src, lane)
+
+//int16x4x2_t vld2_lane_s16(__transfersize(2) int16_t const * ptr, int16x4x2_t src, __constrange(0,3) int lane);// VLD2.16 {d0[0], d1[0]}, [r0]
+int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+#define vld2_lane_s16(ptr, src, lane) vld2_lane_u16(( uint16_t*) ptr, src, lane)
+
+//int32x2x2_t vld2_lane_s32(__transfersize(2) int32_t const * ptr, int32x2x2_t src, __constrange(0,1) int lane);// VLD2.32 {d0[0], d1[0]}, [r0]
+int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0]
+#define vld2_lane_s32(ptr, src, lane) vld2_lane_u32(( uint32_t*) ptr, src, lane)
+
+//float16x4x2_t vld2_lane_f16(__transfersize(2) __fp16 const * ptr, float16x4x2_t src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src,__constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0]
+#define vld2_lane_f32 vld2q_lane_f32
+
+//poly8x8x2_t vld2_lane_p8(__transfersize(2) poly8_t const * ptr, poly8x8x2_t src, __constrange(0,7) int lane);// VLD2.8 {d0[0], d1[0]}, [r0]
+poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); // VLD2.8 {d0[0], d1[0]}, [r0]
+#define vld2_lane_p8 vld2_lane_u8
+
+//poly16x4x2_t vld2_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x4x2_t src, __constrange(0,3)int lane);// VLD2.16 {d0[0], d1[0]}, [r0]
+poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); // VLD2.16 {d0[0], d1[0]}, [r0]
+#define vld2_lane_p16 vld2_lane_u16
+
+//*********** Lane triplets **********************
+//*************************************************
+//does vld1_lane_xx ptr[0] to src->val[0], ptr[1] to src->val[1] and ptr[2] to src->val[2] at lane positon
+//we assume src is 16 bit aligned
+
+//uint16x8x3_t vld3q_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x8x3_t src,__constrange(0,7) int lane);// VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t* src,__constrange(0,7) int lane) // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+{
+ uint16x8x3_t v;
+ v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3q_lane_u16(ptr, src, lane) vld3q_lane_u16_ptr(ptr, &src, lane)
+
+//uint32x4x3_t vld3q_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x4x3_t src,__constrange(0,3) int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t* src,__constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+{
+ uint32x4x3_t v;
+ v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3q_lane_u32(ptr, src, lane) vld3q_lane_u32_ptr(ptr, &src, lane)
+
+//int16x8x3_t vld3q_lane_s16(__transfersize(3) int16_t const * ptr, int16x8x3_t src, __constrange(0,7)int lane);// VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t* src, __constrange(0,7) int lane) // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+{
+ int16x8x3_t v;
+ v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3q_lane_s16(ptr, src, lane) vld3q_lane_s16_ptr(ptr, &src, lane)
+
+//int32x4x3_t vld3q_lane_s32(__transfersize(3) int32_t const * ptr, int32x4x3_t src, __constrange(0,3)int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t* src, __constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+{
+ int32x4x3_t v;
+ v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3q_lane_s32(ptr, src, lane) vld3q_lane_s32_ptr(ptr, &src, lane)
+
+float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+//current IA SIMD doesn't support float16
+#define vld3q_lane_f16(ptr, src, lane) vld3q_lane_f16_ptr(ptr, &src, lane)
+
+//float32x4x3_t vld3q_lane_f32(__transfersize(3) float32_t const * ptr, float32x4x3_t src,__constrange(0,3) int lane);// VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t* src,__constrange(0,3) int lane) // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]
+{
+ float32x4x3_t v;
+ v.val[0] = vld1q_lane_f32(&ptr[0], src->val[0], lane);
+ v.val[1] = vld1q_lane_f32(&ptr[1], src->val[1], lane);
+ v.val[2] = vld1q_lane_f32(&ptr[2], src->val[2], lane);
+ return v;
+}
+#define vld3q_lane_f32(ptr, src, lane) vld3q_lane_f32_ptr(ptr, &src, lane)
+
+poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src,__constrange(0,7) int lane); // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]
+#define vld3q_lane_p16 vld3q_lane_u16
+
+//uint8x8x3_t vld3_lane_u8(__transfersize(3) uint8_t const * ptr, uint8x8x3_t src, __constrange(0,7) int lane);// VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+_NEON2SSE_INLINE uint8x8x3_t vld3_lane_u8_ptr(__transfersize(3) uint8_t const * ptr, uint8x8x3_t* src, __constrange(0,7) int lane) // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+{
+ uint8x8x3_t v;
+ v.val[0] = _MM_INSERT_EPI8 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI8 (src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI8 (src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3_lane_u8(ptr, src, lane) vld3_lane_u8_ptr(ptr, &src, lane)
+
+//uint16x4x3_t vld3_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x4x3_t src, __constrange(0,3)int lane);// VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+_NEON2SSE_INLINE uint16x4x3_t vld3_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x4x3_t* src, __constrange(0,3) int lane) // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+{
+ uint16x4x3_t v;
+ v.val[0] = _MM_INSERT_EPI16 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI16 (src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI16 (src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3_lane_u16(ptr, src, lane) vld3_lane_u16_ptr(ptr, &src, lane)
+
+//uint32x2x3_t vld3_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x2x3_t src, __constrange(0,1)int lane);// VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+_NEON2SSE_INLINE uint32x2x3_t vld3_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x2x3_t* src, __constrange(0,1) int lane) // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+{ //need to merge into 128 bit anyway
+ uint32x2x3_t v;
+ v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI32 (src->val[2], ptr[2], lane);
+ return v;
+}
+#define vld3_lane_u32(ptr, src, lane) vld3_lane_u32_ptr(ptr, &src, lane)
+
+int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+#define vld3_lane_s8(ptr, src, lane) vld3_lane_u8_ptr(( uint8_t*) ptr, &src, lane)
+
+int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+#define vld3_lane_s16(ptr, src, lane) vld3_lane_u16_ptr(( uint16_t*) ptr, &src, lane)
+
+int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+#define vld3_lane_s32(ptr, src, lane) vld3_lane_u32_ptr(( uint32_t*) ptr, &src, lane)
+
+float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+//float32x2x3_t vld3_lane_f32(__transfersize(3) float32_t const * ptr, float32x2x3_t src,__constrange(0,1) int lane);// VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+_NEON2SSE_INLINE float32x2x3_t vld3_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x2x3_t* src,__constrange(0,1) int lane) // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]
+{
+ float32x2x3_t v;
+ v.val[0] = vld1q_lane_f32(ptr, src->val[0], lane);
+ return v;
+}
+#define vld3_lane_f32(ptr, src, lane) vld3_lane_f32_ptr(ptr, &src, lane)
+
+//poly8x8x3_t vld3_lane_p8_ptr(__transfersize(3) poly8_t const * ptr, poly8x8x3_t * src, __constrange(0,7) int lane); // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]
+#define vld3_lane_p8 vld3_lane_u8
+
+//poly16x4x3_t vld3_lane_p16(__transfersize(3) poly16_t const * ptr, poly16x4x3_t * src, __constrange(0,3) int lane); // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]
+#define vld3_lane_p16 vld3_lane_u16
+
+//******************* Lane Quadruples load ***************************
+//*********************************************************************
+//does vld1_lane_xx ptr[0] to src->val[0], ptr[1] to src->val[1], ptr[2] to src->val[2] and ptr[3] to src->val[3] at lane positon
+//we assume src is 16 bit aligned
+
+//uint16x8x4_t vld4q_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x8x4_t src,__constrange(0,7) int lane)// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t* src,__constrange(0,7) int lane)
+{
+ uint16x8x4_t v;
+ v.val[0] = _MM_INSERT_EPI16 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI16 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI16 ( src->val[2], ptr[2], lane);
+ v.val[3] = _MM_INSERT_EPI16 ( src->val[3], ptr[3], lane);
+ return v;
+}
+#define vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16_ptr(ptr, &src, lane)
+
+//uint32x4x4_t vld4q_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x4x4_t src,__constrange(0,3) int lane)// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t* src,__constrange(0,3) int lane)
+{
+ uint32x4x4_t v;
+ v.val[0] = _MM_INSERT_EPI32 ( src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 ( src->val[1], ptr[1], lane);
+ v.val[2] = _MM_INSERT_EPI32 ( src->val[2], ptr[2], lane);
+ v.val[3] = _MM_INSERT_EPI32 ( src->val[3], ptr[3], lane);
+ return v;
+}
+#define vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32_ptr(ptr, &src, lane)
+
+//int16x8x4_t vld4q_lane_s16(__transfersize(4) int16_t const * ptr, int16x8x4_t src, __constrange(0,7)int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+#define vld4q_lane_s16(ptr, src, lane) vld4q_lane_u16(( uint16_t*) ptr, src, lane)
+
+//int32x4x4_t vld4q_lane_s32(__transfersize(4) int32_t const * ptr, int32x4x4_t src, __constrange(0,3)int lane);// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+#define vld4q_lane_s32(ptr, src, lane) vld4q_lane_u32(( uint32_t*) ptr, src, lane)
+
+//float16x8x4_t vld4q_lane_f16(__transfersize(4) __fp16 const * ptr, float16x8x4_t src, __constrange(0,7)int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+//float32x4x4_t vld4q_lane_f32(__transfersize(4) float32_t const * ptr, float32x4x4_t src,__constrange(0,3) int lane)// VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t* src,__constrange(0,3) int lane)
+{
+ float32x4x4_t v;
+ v.val[0] = vld1q_lane_f32(&ptr[0], src->val[0], lane);
+ v.val[1] = vld1q_lane_f32(&ptr[1], src->val[1], lane);
+ v.val[2] = vld1q_lane_f32(&ptr[2], src->val[2], lane);
+ v.val[3] = vld1q_lane_f32(&ptr[3], src->val[3], lane);
+ return v;
+}
+#define vld4q_lane_f32(ptr, src, lane) vld4q_lane_f32_ptr(ptr, &src, lane)
+
+//poly16x8x4_t vld4q_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x8x4_t src,__constrange(0,7) int lane);// VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src,__constrange(0,7) int lane); // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+#define vld4q_lane_p16 vld4q_lane_u16
+
+//uint8x8x4_t vld4_lane_u8(__transfersize(4) uint8_t const * ptr, uint8x8x4_t src, __constrange(0,7) int lane)// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE uint8x8x4_t vld4_lane_u8_ptr(__transfersize(4) uint8_t const * ptr, uint8x8x4_t* src, __constrange(0,7) int lane)
+{
+ uint8x8x4_t v;
+ v.val[0] = _MM_INSERT_EPI8 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI8 (src->val[1], ptr[1], lane );
+ v.val[2] = _MM_INSERT_EPI8 (src->val[2], ptr[2], lane );
+ v.val[3] = _MM_INSERT_EPI8 (src->val[3], ptr[3], lane );
+ return v;
+}
+#define vld4_lane_u8(ptr, src, lane) vld4_lane_u8_ptr(ptr, &src, lane)
+
+//uint16x4x4_t vld4_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x4x4_t src, __constrange(0,3)int lane)// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE uint16x4x4_t vld4_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x4x4_t* src, __constrange(0,3) int lane)
+{
+ uint16x4x4_t v;
+ v.val[0] = _MM_INSERT_EPI16 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI16 (src->val[1], ptr[1], lane );
+ v.val[2] = _MM_INSERT_EPI16 (src->val[2], ptr[2], lane );
+ v.val[3] = _MM_INSERT_EPI16 (src->val[3], ptr[3], lane );
+ return v;
+}
+#define vld4_lane_u16(ptr, src, lane) vld4_lane_u16_ptr(ptr, &src, lane)
+
+//uint32x2x4_t vld4_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x2x4_t src, __constrange(0,1)int lane)// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE uint32x2x4_t vld4_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x2x4_t* src, __constrange(0,1) int lane)
+{
+ uint32x2x4_t v;
+ v.val[0] = _MM_INSERT_EPI32 (src->val[0], ptr[0], lane);
+ v.val[1] = _MM_INSERT_EPI32 (src->val[1], ptr[1], lane );
+ v.val[2] = _MM_INSERT_EPI32 (src->val[2], ptr[2], lane );
+ v.val[3] = _MM_INSERT_EPI32 (src->val[3], ptr[3], lane );
+ return v;
+}
+#define vld4_lane_u32(ptr, src, lane) vld4_lane_u32_ptr(ptr, &src, lane)
+
+//int8x8x4_t vld4_lane_s8(__transfersize(4) int8_t const * ptr, int8x8x4_t src, __constrange(0,7) int lane);// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane);
+#define vld4_lane_s8(ptr,src,lane) vld4_lane_u8((uint8_t*)ptr,src,lane)
+
+//int16x4x4_t vld4_lane_s16(__transfersize(4) int16_t const * ptr, int16x4x4_t src, __constrange(0,3) int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane);
+#define vld4_lane_s16(ptr,src,lane) vld4_lane_u16((uint16_t*)ptr,src,lane)
+
+//int32x2x4_t vld4_lane_s32(__transfersize(4) int32_t const * ptr, int32x2x4_t src, __constrange(0,1) int lane);// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane);
+#define vld4_lane_s32(ptr,src,lane) vld4_lane_u32((uint32_t*)ptr,src,lane)
+
+//float16x4x4_t vld4_lane_f16(__transfersize(4) __fp16 const * ptr, float16x4x4_t src, __constrange(0,3)int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane);
+//current IA SIMD doesn't support float16
+
+//float32x2x4_t vld4_lane_f32(__transfersize(4) float32_t const * ptr, float32x2x4_t src,__constrange(0,1) int lane)// VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE float32x2x4_t vld4_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x2x4_t* src,__constrange(0,1) int lane)
+{ //serial solution may be faster
+ float32x2x4_t v;
+ return v;
+}
+#define vld4_lane_f32(ptr, src, lane) vld4_lane_f32_ptr(ptr, &src, lane)
+
+//poly8x8x4_t vld4_lane_p8(__transfersize(4) poly8_t const * ptr, poly8x8x4_t src, __constrange(0,7) int lane);// VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane);
+#define vld4_lane_p8 vld4_lane_u8
+
+//poly16x4x4_t vld4_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x4x4_t src, __constrange(0,3)int lane);// VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane);
+#define vld4_lane_p16 vld4_lane_u16
+
+//******************* Store duplets *********************************************
+//********************************************************************************
+//here we assume the ptr is 16bit aligned. If not we need to use _mm_storeu_si128 like shown in vst1q_u8 function
+//If necessary you need to modify all store functions accordingly. See more comments to "Store single" functions
+//void vst2q_u8(__transfersize(32) uint8_t * ptr, uint8x16x2_t val)// VST2.8 {d0, d2}, [r0]
+_NEON2SSE_INLINE void vst2q_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x16x2_t* val)
+{
+ uint8x16x2_t v;
+ v.val[0] = _mm_unpacklo_epi8(val->val[0], val->val[1]);
+ v.val[1] = _mm_unpackhi_epi8(val->val[0], val->val[1]);
+ vst1q_u8 (ptr, v.val[0]);
+ vst1q_u8 ((ptr + 16), v.val[1]);
+}
+#define vst2q_u8(ptr, val) vst2q_u8_ptr(ptr, &val)
+
+//void vst2q_u16(__transfersize(16) uint16_t * ptr, uint16x8x2_t val)// VST2.16 {d0, d2}, [r0]
+_NEON2SSE_INLINE void vst2q_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x8x2_t* val)
+{
+ uint16x8x2_t v;
+ v.val[0] = _mm_unpacklo_epi16(val->val[0], val->val[1]);
+ v.val[1] = _mm_unpackhi_epi16(val->val[0], val->val[1]);
+ vst1q_u16 (ptr, v.val[0]);
+ vst1q_u16 ((ptr + 8), v.val[1]);
+}
+#define vst2q_u16(ptr, val) vst2q_u16_ptr(ptr, &val)
+
+//void vst2q_u32(__transfersize(8) uint32_t * ptr, uint32x4x2_t val)// VST2.32 {d0, d2}, [r0]
+_NEON2SSE_INLINE void vst2q_u32_ptr(__transfersize(8) uint32_t* ptr, uint32x4x2_t* val)
+{
+ uint32x4x2_t v;
+ v.val[0] = _mm_unpacklo_epi32(val->val[0], val->val[1]);
+ v.val[1] = _mm_unpackhi_epi32(val->val[0], val->val[1]);
+ vst1q_u32 (ptr, v.val[0]);
+ vst1q_u32 ((ptr + 4), v.val[1]);
+}
+#define vst2q_u32(ptr, val) vst2q_u32_ptr(ptr, &val)
+
+//void vst2q_s8(__transfersize(32) int8_t * ptr, int8x16x2_t val); // VST2.8 {d0, d2}, [r0]
+void vst2q_s8_ptr(__transfersize(32) int8_t * ptr, int8x16x2_t * val);
+#define vst2q_s8(ptr, val) vst2q_u8((uint8_t*)(ptr), val)
+
+//void vst2q_s16(__transfersize(16) int16_t * ptr, int16x8x2_t val);// VST2.16 {d0, d2}, [r0]
+void vst2q_s16_ptr(__transfersize(16) int16_t * ptr, int16x8x2_t * val);
+#define vst2q_s16(ptr, val) vst2q_u16((uint16_t*)(ptr), val)
+
+//void vst2q_s32(__transfersize(8) int32_t * ptr, int32x4x2_t val);// VST2.32 {d0, d2}, [r0]
+void vst2q_s32_ptr(__transfersize(8) int32_t * ptr, int32x4x2_t * val);
+#define vst2q_s32(ptr, val) vst2q_u32((uint32_t*)(ptr), val)
+
+//void vst2q_f16(__transfersize(16) __fp16 * ptr, float16x8x2_t val);// VST2.16 {d0, d2}, [r0]
+void vst2q_f16_ptr(__transfersize(16) __fp16 * ptr, float16x8x2_t * val);
+// IA32 SIMD doesn't work with 16bit floats currently
+
+//void vst2q_f32(__transfersize(8) float32_t * ptr, float32x4x2_t val)// VST2.32 {d0, d2}, [r0]
+_NEON2SSE_INLINE void vst2q_f32_ptr(__transfersize(8) float32_t* ptr, float32x4x2_t* val)
+{
+ float32x4x2_t v;
+ v.val[0] = _mm_unpacklo_ps(val->val[0], val->val[1]);
+ v.val[1] = _mm_unpackhi_ps(val->val[0], val->val[1]);
+ vst1q_f32 (ptr, v.val[0]);
+ vst1q_f32 ((ptr + 4), v.val[1]);
+}
+#define vst2q_f32(ptr, val) vst2q_f32_ptr(ptr, &val)
+
+//void vst2q_p8(__transfersize(32) poly8_t * ptr, poly8x16x2_t val);// VST2.8 {d0, d2}, [r0]
+void vst2q_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x16x2_t * val);
+#define vst2q_p8 vst2q_u8
+
+//void vst2q_p16(__transfersize(16) poly16_t * ptr, poly16x8x2_t val);// VST2.16 {d0, d2}, [r0]
+void vst2q_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x8x2_t * val);
+#define vst2q_p16 vst2q_u16
+
+//void vst2_u8(__transfersize(16) uint8_t * ptr, uint8x8x2_t val);// VST2.8 {d0, d1}, [r0]
+_NEON2SSE_INLINE void vst2_u8_ptr(__transfersize(16) uint8_t * ptr, uint8x8x2_t* val)
+{
+ uint8x8x2_t v;
+ v.val[0] = _mm_unpacklo_epi8(val->val[0], val->val[1]);
+ vst1q_u8 (ptr, v.val[0]);
+}
+#define vst2_u8(ptr, val) vst2_u8_ptr(ptr, &val)
+
+//void vst2_u16(__transfersize(8) uint16_t * ptr, uint16x4x2_t val);// VST2.16 {d0, d1}, [r0]
+_NEON2SSE_INLINE void vst2_u16_ptr(__transfersize(8) uint16_t * ptr, uint16x4x2_t* val)
+{
+ uint16x4x2_t v;
+ v.val[0] = _mm_unpacklo_epi16(val->val[0], val->val[1]);
+ vst1q_u16 (ptr, v.val[0]);
+}
+#define vst2_u16(ptr, val) vst2_u16_ptr(ptr, &val)
+
+//void vst2_u32(__transfersize(4) uint32_t * ptr, uint32x2x2_t val);// VST2.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE void vst2_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x2_t* val)
+{
+ uint32x2x2_t v;
+ v.val[0] = _mm_unpacklo_epi32(val->val[0], val->val[1]);
+ vst1q_u32 (ptr, v.val[0]);
+}
+#define vst2_u32(ptr, val) vst2_u32_ptr(ptr, &val)
+
+//void vst2_u64(__transfersize(2) uint64_t * ptr, uint64x1x2_t val);// VST1.64 {d0, d1}, [r0]
+void vst2_u64_ptr(__transfersize(2) uint64_t * ptr, uint64x1x2_t * val);
+_NEON2SSE_INLINE void vst2_u64_ptr(__transfersize(2) uint64_t * ptr, uint64x1x2_t* val)
+{
+ uint64x1x2_t v;
+ v.val[0] = _mm_unpacklo_epi64(val->val[0], val->val[1]);
+ vst1q_u64(ptr, v.val[0]);
+}
+#define vst2_u64(ptr, val) vst2_u64_ptr(ptr, &val)
+
+//void vst2_s8(__transfersize(16) int8_t * ptr, int8x8x2_t val);// VST2.8 {d0, d1}, [r0]
+#define vst2_s8(ptr, val) vst2_u8((uint8_t*) ptr, val)
+
+//void vst2_s16(__transfersize(8) int16_t * ptr, int16x4x2_t val); // VST2.16 {d0, d1}, [r0]
+#define vst2_s16(ptr,val) vst2_u16((uint16_t*) ptr, val)
+
+//void vst2_s32(__transfersize(4) int32_t * ptr, int32x2x2_t val); // VST2.32 {d0, d1}, [r0]
+#define vst2_s32(ptr,val) vst2_u32((uint32_t*) ptr, val)
+
+//void vst2_s64(__transfersize(2) int64_t * ptr, int64x1x2_t val);
+#define vst2_s64(ptr,val) vst2_u64((uint64_t*) ptr,val)
+
+//void vst2_f16(__transfersize(8) __fp16 * ptr, float16x4x2_t val); // VST2.16 {d0, d1}, [r0]
+//current IA SIMD doesn't support float16
+
+void vst2_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x2_t * val); // VST2.32 {d0, d1}, [r0]
+_NEON2SSE_INLINE void vst2_f32_ptr(__transfersize(4) float32_t* ptr, float32x2x2_t* val)
+{
+ float32x4x2_t v;
+ v.val[0] = _mm_unpacklo_ps(val->val[0], val->val[1]);
+ vst1q_f32 (ptr, v.val[0]);
+}
+#define vst2_f32(ptr, val) vst2_f32_ptr(ptr, &val)
+
+//void vst2_p8_ptr(__transfersize(16) poly8_t * ptr, poly8x8x2_t * val); // VST2.8 {d0, d1}, [r0]
+#define vst2_p8 vst2_u8
+
+//void vst2_p16_ptr(__transfersize(8) poly16_t * ptr, poly16x4x2_t * val); // VST2.16 {d0, d1}, [r0]
+#define vst2_p16 vst2_u16
+
+//******************** Triplets store *****************************************
+//******************************************************************************
+//void vst3q_u8(__transfersize(48) uint8_t * ptr, uint8x16x3_t val)// VST3.8 {d0, d2, d4}, [r0]
+#if defined(USE_SSSE3)
+_NEON2SSE_INLINE void vst3q_u8_ptr(__transfersize(48) uint8_t * ptr, uint8x16x3_t* val)
+{
+ uint8x16x3_t v;
+ __m128i v0,v1,v2, cff, bldmask;
+ _NEON2SSE_ALIGN_16 uint8_t mask0[16] = {0, 1, 0xff, 2, 3,0xff, 4, 5,0xff, 6,7,0xff, 8,9,0xff, 10};
+ _NEON2SSE_ALIGN_16 uint8_t mask1[16] = {0, 0xff, 1, 2, 0xff, 3, 4, 0xff, 5, 6, 0xff, 7,8,0xff, 9,10};
+ _NEON2SSE_ALIGN_16 uint8_t mask2[16] = {0xff, 6, 7, 0xff, 8, 9,0xff, 10, 11,0xff, 12,13,0xff, 14,15,0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2lo[16] = {0xff,0xff, 0, 0xff,0xff, 1, 0xff,0xff, 2, 0xff,0xff, 3, 0xff,0xff, 4, 0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2med[16] = {0xff, 5, 0xff, 0xff, 6, 0xff,0xff, 7, 0xff,0xff, 8, 0xff,0xff, 9, 0xff, 0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2hi[16] = {10, 0xff,0xff, 11, 0xff,0xff, 12, 0xff,0xff, 13, 0xff,0xff, 14, 0xff, 0xff, 15};
+
+ v0 = _mm_unpacklo_epi8(val->val[0], val->val[1]); //0,1, 3,4, 6,7, 9,10, 12,13, 15,16, 18,19, 21,22
+ v2 = _mm_unpackhi_epi8(val->val[0], val->val[1]); //24,25, 27,28, 30,31, 33,34, 36,37, 39,40, 42,43, 45,46
+ v1 = _mm_alignr_epi8(v2, v0, 11); //12,13, 15,16, 18,19, 21,22, 24,25, 27,28, 30,31, 33,34
+ v.val[0] = _mm_shuffle_epi8(v0, *(__m128i*)mask0); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2lo); //make plugs for the v.val[2] data embedding
+ cff = _mm_cmpeq_epi8(v0, v0); //all ff
+ bldmask = _mm_cmpeq_epi8(*(__m128i*)mask0, cff);
+ v.val[0] = _MM_BLENDV_EPI8(v.val[0], v.val[2], bldmask);
+ vst1q_u8(ptr, v.val[0]);
+ v.val[0] = _mm_shuffle_epi8(v1, *(__m128i*)mask1); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2med); //make plugs for the v.val[2] data embedding
+ bldmask = _mm_cmpeq_epi8(*(__m128i*)mask1, cff);
+ v.val[1] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask);
+ vst1q_u8((ptr + 16), v.val[1]);
+ v.val[0] = _mm_shuffle_epi8(v2, *(__m128i*)mask2); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2hi); //make plugs for the v.val[2] data embedding
+ bldmask = _mm_cmpeq_epi8(*(__m128i*)mask2, cff);
+ v.val[2] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask );
+ vst1q_u8((ptr + 32), v.val[2]);
+}
+#define vst3q_u8(ptr, val) vst3q_u8_ptr(ptr, &val)
+#endif
+
+#if defined(USE_SSSE3)
+//void vst3q_u16(__transfersize(24) uint16_t * ptr, uint16x8x3_t val)// VST3.16 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE void vst3q_u16_ptr(__transfersize(24) uint16_t * ptr, uint16x8x3_t* val)
+{
+ uint16x8x3_t v;
+ __m128i v0,v1,v2, cff, bldmask;
+ _NEON2SSE_ALIGN_16 uint8_t mask0[16] = {0,1, 2,3, 0xff,0xff, 4,5, 6,7,0xff,0xff, 8,9,10,11};
+ _NEON2SSE_ALIGN_16 uint8_t mask1[16] = {0xff, 0xff, 0,1, 2,3, 0xff,0xff, 4,5, 6,7, 0xff,0xff, 8,9};
+ _NEON2SSE_ALIGN_16 uint8_t mask2[16] = {6,7,0xff,0xff, 8,9,10,11, 0xff, 0xff, 12,13,14,15, 0xff, 0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2lo[16] = {0xff,0xff, 0xff,0xff, 0,1, 0xff,0xff, 0xff,0xff, 2,3, 0xff,0xff, 0xff,0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2med[16] = {4,5, 0xff,0xff,0xff,0xff, 6,7, 0xff, 0xff,0xff,0xff, 8,9, 0xff, 0xff};
+ _NEON2SSE_ALIGN_16 uint8_t mask2hi[16] = {0xff, 0xff, 10,11, 0xff, 0xff, 0xff, 0xff, 12,13, 0xff, 0xff, 0xff, 0xff,14,15};
+
+ v0 = _mm_unpacklo_epi16(val->val[0], val->val[1]); //0,1, 3,4, 6,7, 9,10
+ v2 = _mm_unpackhi_epi16(val->val[0], val->val[1]); //12,13, 15,16, 18,19, 21,22,
+ v1 = _mm_alignr_epi8(v2, v0, 12); //9,10, 12,13, 15,16, 18,19
+ v.val[0] = _mm_shuffle_epi8(v0, *(__m128i*)mask0); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2lo); //make plugs for the v.val[2] data embedding
+ cff = _mm_cmpeq_epi16(v0, v0); //all ff
+ bldmask = _mm_cmpeq_epi16(*(__m128i*)mask0, cff);
+ v.val[0] = _MM_BLENDV_EPI8(v.val[0], v.val[2], bldmask);
+ vst1q_u16(ptr, v.val[0]);
+ v.val[0] = _mm_shuffle_epi8(v1, *(__m128i*)mask1); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2med); //make plugs for the v.val[2] data embedding
+ bldmask = _mm_cmpeq_epi16(*(__m128i*)mask1, cff);
+ v.val[1] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask);
+ vst1q_u16((ptr + 8), v.val[1]);
+ v.val[0] = _mm_shuffle_epi8(v2, *(__m128i*)mask2); //make holes for the v.val[2] data embedding
+ v.val[2] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask2hi); //make plugs for the v.val[2] data embedding
+ bldmask = _mm_cmpeq_epi16(*(__m128i*)mask2, cff);
+ v.val[2] = _MM_BLENDV_EPI8(v.val[0],v.val[2], bldmask );
+ vst1q_u16((ptr + 16), v.val[2]);
+}
+#define vst3q_u16(ptr, val) vst3q_u16_ptr(ptr, &val)
+#endif
+
+//void vst3q_u32(__transfersize(12) uint32_t * ptr, uint32x4x3_t val)// VST3.32 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE void vst3q_u32_ptr(__transfersize(12) uint32_t * ptr, uint32x4x3_t* val)
+{ //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,b0,c0,a1, b1,c1,a2,b2, c2,a3,b3,c3
+ uint32x4x3_t v;
+ __m128i tmp0, tmp1,tmp2;
+ tmp0 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //a0,b0,a1,b1
+ tmp1 = _mm_unpackhi_epi32(val->val[0], val->val[1]); //a2,b2,a3,b3
+ tmp2 = _mm_unpacklo_epi32(val->val[1], val->val[2]); //b0,c0,b1,c1
+ v.val[1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp2),_mm_castsi128_ps(tmp1), _MM_SHUFFLE(1,0,3,2))); //b1,c1,a2,b2,
+ v.val[2] = _mm_unpackhi_epi64(tmp1, val->val[2]); //a3,b3, c2,c3
+ v.val[2] = _mm_shuffle_epi32(v.val[2], 2 | (0 << 2) | (1 << 4) | (3 << 6)); //c2,a3,b3,c3
+ tmp1 = _mm_unpacklo_epi32(tmp2,val->val[0]); //b0,a0,c0,a1
+ v.val[0] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp0),_mm_castsi128_ps(tmp1), _MM_SHUFFLE(3,2,1,0))); //a0,b0,c0,a1,
+
+ vst1q_u32(ptr, v.val[0]);
+ vst1q_u32((ptr + 4), v.val[1]);
+ vst1q_u32((ptr + 8), v.val[2]);
+}
+#define vst3q_u32(ptr, val) vst3q_u32_ptr(ptr, &val)
+
+#if defined(USE_SSSE3)
+//void vst3q_s8(__transfersize(48) int8_t * ptr, int8x16x3_t val);
+void vst3q_s8_ptr(__transfersize(48) int8_t * ptr, int8x16x3_t * val);
+#define vst3q_s8(ptr, val) vst3q_u8((uint8_t*)(ptr), val)
+
+//void vst3q_s16(__transfersize(24) int16_t * ptr, int16x8x3_t val);
+void vst3q_s16_ptr(__transfersize(24) int16_t * ptr, int16x8x3_t * val);
+#define vst3q_s16(ptr, val) vst3q_u16((uint16_t*)(ptr), val)
+#endif
+
+//void vst3q_s32(__transfersize(12) int32_t * ptr, int32x4x3_t val);
+void vst3q_s32_ptr(__transfersize(12) int32_t * ptr, int32x4x3_t * val);
+#define vst3q_s32(ptr, val) vst3q_u32((uint32_t*)(ptr), val)
+
+//void vst3q_f16(__transfersize(24) __fp16 * ptr, float16x8x3_t val);// VST3.16 {d0, d2, d4}, [r0]
+void vst3q_f16_ptr(__transfersize(24) __fp16 * ptr, float16x8x3_t * val);
+// IA32 SIMD doesn't work with 16bit floats currently
+
+//void vst3q_f32(__transfersize(12) float32_t * ptr, float32x4x3_t val)// VST3.32 {d0, d2, d4}, [r0]
+_NEON2SSE_INLINE void vst3q_f32_ptr(__transfersize(12) float32_t * ptr, float32x4x3_t* val)
+{
+ float32x4x3_t v;
+ __m128 tmp0, tmp1,tmp2;
+ tmp0 = _mm_unpacklo_ps(val->val[0], val->val[1]); //a0,b0,a1,b1
+ tmp1 = _mm_unpackhi_ps(val->val[0], val->val[1]); //a2,b2,a3,b3
+ tmp2 = _mm_unpacklo_ps(val->val[1], val->val[2]); //b0,c0,b1,c1
+ v.val[1] = _mm_shuffle_ps(tmp2,tmp1, _MM_SHUFFLE(1,0,3,2)); //b1,c1,a2,b2,
+ v.val[2] = _mm_movehl_ps(val->val[2],tmp1); //a3,b3, c2,c3
+ v.val[2] = _mm_shuffle_ps(v.val[2],v.val[2], _MM_SHUFFLE(3,1,0,2)); //c2,a3,b3,c3
+ tmp1 = _mm_unpacklo_ps(tmp2,val->val[0]); //b0,a0,c0,a1
+ v.val[0] = _mm_shuffle_ps(tmp0,tmp1, _MM_SHUFFLE(3,2,1,0)); //a0,b0,c0,a1,
+
+ vst1q_f32( ptr, v.val[0]);
+ vst1q_f32( (ptr + 4), v.val[1]);
+ vst1q_f32( (ptr + 8), v.val[2]);
+}
+#define vst3q_f32(ptr, val) vst3q_f32_ptr(ptr, &val)
+
+#if defined(USE_SSSE3)
+//void vst3q_p8(__transfersize(48) poly8_t * ptr, poly8x16x3_t val);// VST3.8 {d0, d2, d4}, [r0]
+void vst3q_p8_ptr(__transfersize(48) poly8_t * ptr, poly8x16x3_t * val);
+#define vst3q_p8 vst3q_u8
+
+//void vst3q_p16(__transfersize(24) poly16_t * ptr, poly16x8x3_t val);// VST3.16 {d0, d2, d4}, [r0]
+void vst3q_p16_ptr(__transfersize(24) poly16_t * ptr, poly16x8x3_t * val);
+#define vst3q_p16 vst3q_u16
+#endif
+
+//void vst3_u8(__transfersize(24) uint8_t * ptr, uint8x8x3_t val)// VST3.8 {d0, d1, d2}, [r0]
+#if defined(USE_SSSE3)
+_NEON2SSE_INLINE void vst3_u8_ptr(__transfersize(24) uint8_t * ptr, uint8x8x3_t* val)
+{
+ uint8x8x3_t v;
+ __m128i tmp, sh0, sh1;
+ _NEON2SSE_ALIGN_16 int8_t mask0[16] = { 0, 8, 16, 1, 9, 17, 2, 10, 18, 3, 11, 19, 4, 12, 20, 5};
+ _NEON2SSE_ALIGN_16 int8_t mask1[16] = {13, 21, 6, 14, 22, 7, 15, 23, 0,0,0,0,0,0,0,0};
+ _NEON2SSE_ALIGN_16 int8_t mask0_sel[16] = {0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0};
+ _NEON2SSE_ALIGN_16 int8_t mask1_sel[16] = {0, 0xff, 0, 0, 0xff, 0, 0, 0xff, 0,0,0,0,0,0,0,0};
+ tmp = _mm_unpacklo_epi64(val->val[0], val->val[1]);
+ sh0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask0); //for bi>15 bi is wrapped (bi-=15)
+ sh1 = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask0);
+ v.val[0] = _MM_BLENDV_EPI8(sh0, sh1, *(__m128i*)mask0_sel);
+ vst1q_u8(ptr, v.val[0]); //store as 128 bit structure
+ sh0 = _mm_shuffle_epi8(tmp, *(__m128i*)mask1); //for bi>15 bi is wrapped (bi-=15)
+ sh1 = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask1);
+ v.val[1] = _MM_BLENDV_EPI8(sh0, sh1, *(__m128i*)mask1_sel);
+}
+#define vst3_u8(ptr, val) vst3_u8_ptr(ptr, &val)
+#endif
+
+//void vst3_u16(__transfersize(12) uint16_t * ptr, uint16x4x3_t val)// VST3.16 {d0, d1, d2}, [r0]
+#if defined(USE_SSSE3)
+_NEON2SSE_INLINE void vst3_u16_ptr(__transfersize(12) uint16_t * ptr, uint16x4x3_t* val)
+{
+ uint16x4x3_t v;
+ __m128i tmp;
+ _NEON2SSE_ALIGN_16 int8_t mask0[16] = {0,1, 8,9, 16,17, 2,3, 10,11, 18,19, 4,5, 12,13};
+ _NEON2SSE_ALIGN_16 int8_t mask1[16] = {20,21, 6,7, 14,15, 22,23, 0,0,0,0,0,0,0,0};
+ _NEON2SSE_ALIGN_16 uint16_t mask0f[8] = {0xffff, 0xffff, 0, 0xffff, 0xffff, 0, 0xffff, 0xffff}; //if all ones we take the result from v.val[0] otherwise from v.val[1]
+ _NEON2SSE_ALIGN_16 uint16_t mask1f[8] = {0xffff, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}; //if all ones we take the result from v.val[1] otherwise from v.val[0]
+ tmp = _mm_unpacklo_epi64(val->val[0], val->val[1]);
+ v.val[0] = _mm_shuffle_epi8(tmp, *(__m128i*)mask0);
+ v.val[1] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask0);
+ v.val[0] = _MM_BLENDV_EPI8(v.val[1], v.val[0], *(__m128i*)mask0f);
+ vst1q_u16(ptr, v.val[0]); //store as 128 bit structure
+ v.val[0] = _mm_shuffle_epi8(tmp, *(__m128i*)mask1);
+ v.val[1] = _mm_shuffle_epi8(val->val[2], *(__m128i*)mask1);
+ v.val[1] = _MM_BLENDV_EPI8(v.val[0], v.val[1], *(__m128i*)mask1f); //change the operands order
+}
+#define vst3_u16(ptr, val) vst3_u16_ptr(ptr, &val)
+#endif
+
+//void vst3_u32(__transfersize(6) uint32_t * ptr, uint32x2x3_t val)// VST3.32 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE void vst3_u32_ptr(__transfersize(6) uint32_t * ptr, uint32x2x3_t* val)
+{ //val->val[0]:0,3,val->val[1]:1,4; val->val[2]:2,5,x,x;
+ uint32x2x3_t res;
+ res.val[0] = _mm_unpacklo_epi64(val->val[1], val->val[2]); //val[0]: 1,4,2,5
+ res.val[0] = _mm_shuffle_epi32(res.val[0], 0 | (2 << 2) | (1 << 4) | (3 << 6)); //1,2,4,5
+ res.val[1] = _mm_srli_si128(res.val[0], 8); //4,5, x,x
+ res.val[0] = _mm_unpacklo_epi32(val->val[0], res.val[0]); //0,1,3,2
+ res.val[0] = _mm_shuffle_epi32(res.val[0], 0 | (1 << 2) | (3 << 4) | (2 << 6)); //0,1,2, 3
+ vst1q_u32(ptr, res.val[0]); //store as 128 bit structure
+}
+#define vst3_u32(ptr, val) vst3_u32_ptr(ptr, &val)
+
+//void vst3_u64(__transfersize(3) uint64_t * ptr, uint64x1x3_t val)// VST1.64 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE void vst3_u64_ptr(__transfersize(3) uint64_t * ptr, uint64x1x3_t* val)
+{
+ __m128i tmp;
+ tmp = _mm_unpacklo_epi64(val->val[0], val->val[1]);
+ vst1q_u64(ptr, tmp); //store as 128 bit structure
+}
+#define vst3_u64(ptr, val) vst3_u64_ptr(ptr, &val)
+
+#if defined(USE_SSSE3)
+//void vst3_s8(__transfersize(24) int8_t * ptr, int8x8x3_t val) // VST3.8 {d0, d1, d2}, [r0]
+#define vst3_s8(ptr, val) vst3_u8_ptr((uint8_t*)ptr, &val)
+
+//void vst3_s16(__transfersize(12) int16_t * ptr, int16x4x3_t val) // VST3.16 {d0, d1, d2}, [r0]
+#define vst3_s16(ptr, val) vst3_u16_ptr((uint16_t*)ptr, &val)
+#endif
+
+//void vst3_s32(__transfersize(6) int32_t * ptr, int32x2x3_t val); // VST3.32 {d0, d1, d2}, [r0]
+#define vst3_s32(ptr, val) vst3_u32_ptr((uint32_t*)ptr, &val)
+
+//void vst3_s64(__transfersize(3) int64_t * ptr, int64x1x3_t val) // VST1.64 {d0, d1, d2}, [r0]
+#define vst3_s64(ptr, val) vst3_u64_ptr((uint64_t*)ptr, &val)
+
+//void vst3_f16(__transfersize(12) __fp16 * ptr, float16x4x3_t val);// VST3.16 {d0, d1, d2}, [r0]
+void vst3_f16_ptr(__transfersize(12) __fp16 * ptr, float16x4x3_t * val); // VST3.16 {d0, d1, d2}, [r0]
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+//void vst3_f32(__transfersize(6) float32_t * ptr, float32x2x3_t val)// VST3.32 {d0, d1, d2}, [r0]
+_NEON2SSE_INLINE void vst3_f32_ptr(__transfersize(6) float32_t * ptr, float32x2x3_t* val)
+{ //val->val[0]:0,3,val->val[1]:1,4; val->val[2]:2,5,x,x;
+ float32x2x3_t res;
+ res.val[0] = _mm_castsi128_ps(_mm_unpacklo_epi64(_mm_castps_si128(val->val[1]), _mm_castps_si128(val->val[2])) );
+ res.val[0] = _mm_shuffle_ps(res.val[0],res.val[0], _MM_SHUFFLE(3,1,2,0)); //1,2,4,5
+ res.val[1] = _mm_shuffle_ps(res.val[0],res.val[0], _MM_SHUFFLE(1,0,3,2)); //4,5, 1,2
+ res.val[0] = _mm_unpacklo_ps(val->val[0], res.val[0]); //0,1,3, 2
+ res.val[0] = _mm_shuffle_ps(res.val[0],res.val[0], _MM_SHUFFLE(2,3,1,0)); //0,1,2, 3
+ vst1q_f32(ptr, res.val[0]); //store as 128 bit structure
+}
+#define vst3_f32(ptr, val) vst3_f32_ptr(ptr, &val)
+
+#if defined(USE_SSSE3)
+//void vst3_p8(__transfersize(24) poly8_t * ptr, poly8x8x3_t val);// VST3.8 {d0, d1, d2}, [r0]
+void vst3_p8_ptr(__transfersize(24) poly8_t * ptr, poly8x8x3_t * val);
+#define vst3_p8 vst3_u8
+
+//void vst3_p16(__transfersize(12) poly16_t * ptr, poly16x4x3_t val);// VST3.16 {d0, d1, d2}, [r0]
+void vst3_p16_ptr(__transfersize(12) poly16_t * ptr, poly16x4x3_t * val);
+#define vst3_p16 vst3_s16
+#endif
+
+//*************** Quadruples store ********************************
+//*********************************************************************
+//void vst4q_u8(__transfersize(64) uint8_t * ptr, uint8x16x4_t val)// VST4.8 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE void vst4q_u8_ptr(__transfersize(64) uint8_t * ptr, uint8x16x4_t* val)
+{
+ __m128i tmp1, tmp2, res;
+ tmp1 = _mm_unpacklo_epi8(val->val[0], val->val[1]); // 0,1, 4,5, 8,9, 12,13, 16,17, 20,21, 24,25, 28,29
+ tmp2 = _mm_unpacklo_epi8(val->val[2], val->val[3]); // 2,3, 6,7, 10,11, 14,15, 18,19, 22,23, 26,27, 30,31
+ res = _mm_unpacklo_epi16(tmp1, tmp2); //0,1, 2,3, 4,5, 6,7, 8,9, 10,11, 12,13, 14,15
+ vst1q_u8(ptr, res);
+ res = _mm_unpackhi_epi16(tmp1, tmp2); //16,17, 18,19, 20,21, 22,23, 24,25, 26,27, 28,29, 30,31
+ vst1q_u8((ptr + 16), res);
+ tmp1 = _mm_unpackhi_epi8(val->val[0], val->val[1]); //
+ tmp2 = _mm_unpackhi_epi8(val->val[2], val->val[3]); //
+ res = _mm_unpacklo_epi16(tmp1, tmp2); //
+ vst1q_u8((ptr + 32), res);
+ res = _mm_unpackhi_epi16(tmp1, tmp2); //
+ vst1q_u8((ptr + 48), res);
+}
+#define vst4q_u8(ptr, val) vst4q_u8_ptr(ptr, &val)
+
+//void vst4q_u16(__transfersize(32) uint16_t * ptr, uint16x8x4_t val)// VST4.16 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE void vst4q_u16_ptr(__transfersize(32) uint16_t * ptr, uint16x8x4_t* val)
+{
+ uint16x8x4_t v;
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_unpacklo_epi16(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13
+ tmp2 = _mm_unpacklo_epi16(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15
+ v.val[0] = _mm_unpacklo_epi32(tmp1, tmp2);
+ v.val[1] = _mm_unpackhi_epi32(tmp1, tmp2);
+ tmp1 = _mm_unpackhi_epi16(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13
+ tmp2 = _mm_unpackhi_epi16(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15
+ v.val[2] = _mm_unpacklo_epi32(tmp1, tmp2);
+ v.val[3] = _mm_unpackhi_epi32(tmp1, tmp2);
+ vst1q_u16(ptr, v.val[0]);
+ vst1q_u16((ptr + 8), v.val[1]);
+ vst1q_u16((ptr + 16),v.val[2]);
+ vst1q_u16((ptr + 24), v.val[3]);
+}
+#define vst4q_u16(ptr, val) vst4q_u16_ptr(ptr, &val)
+
+//void vst4q_u32(__transfersize(16) uint32_t * ptr, uint32x4x4_t val)// VST4.32 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE void vst4q_u32_ptr(__transfersize(16) uint32_t * ptr, uint32x4x4_t* val)
+{
+ uint16x8x4_t v;
+ __m128i tmp1, tmp2;
+ tmp1 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13
+ tmp2 = _mm_unpacklo_epi32(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15
+ v.val[0] = _mm_unpacklo_epi64(tmp1, tmp2);
+ v.val[1] = _mm_unpackhi_epi64(tmp1, tmp2);
+ tmp1 = _mm_unpackhi_epi32(val->val[0], val->val[1]); //0,1, 4,5, 8,9, 12,13
+ tmp2 = _mm_unpackhi_epi32(val->val[2], val->val[3]); //2,3, 6,7 , 10,11, 14,15
+ v.val[2] = _mm_unpacklo_epi64(tmp1, tmp2);
+ v.val[3] = _mm_unpackhi_epi64(tmp1, tmp2);
+ vst1q_u32(ptr, v.val[0]);
+ vst1q_u32((ptr + 4), v.val[1]);
+ vst1q_u32((ptr + 8), v.val[2]);
+ vst1q_u32((ptr + 12), v.val[3]);
+}
+#define vst4q_u32(ptr, val) vst4q_u32_ptr(ptr, &val)
+
+//void vst4q_s8(__transfersize(64) int8_t * ptr, int8x16x4_t val);
+void vst4q_s8_ptr(__transfersize(64) int8_t * ptr, int8x16x4_t * val);
+#define vst4q_s8(ptr, val) vst4q_u8((uint8_t*)(ptr), val)
+
+//void vst4q_s16(__transfersize(32) int16_t * ptr, int16x8x4_t val);
+void vst4q_s16_ptr(__transfersize(32) int16_t * ptr, int16x8x4_t * val);
+#define vst4q_s16(ptr, val) vst4q_u16((uint16_t*)(ptr), val)
+
+//void vst4q_s32(__transfersize(16) int32_t * ptr, int32x4x4_t val);
+void vst4q_s32_ptr(__transfersize(16) int32_t * ptr, int32x4x4_t * val);
+#define vst4q_s32(ptr, val) vst4q_u32((uint32_t*)(ptr), val)
+
+//void vst4q_f16(__transfersize(32) __fp16 * ptr, float16x8x4_t val);// VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4q_f16_ptr(__transfersize(32) __fp16 * ptr, float16x8x4_t * val);
+// IA32 SIMD doesn't work with 16bit floats currently
+
+//void vst4q_f32(__transfersize(16) float32_t * ptr, float32x4x4_t val)// VST4.32 {d0, d2, d4, d6}, [r0]
+_NEON2SSE_INLINE void vst4q_f32_ptr(__transfersize(16) float32_t * ptr, float32x4x4_t* val)
+{
+ __m128 tmp3, tmp2, tmp1, tmp0;
+ float32x4x4_t v;
+ tmp0 = _mm_unpacklo_ps(val->val[0], val->val[1]);
+ tmp2 = _mm_unpacklo_ps(val->val[2], val->val[3]);
+ tmp1 = _mm_unpackhi_ps(val->val[0], val->val[1]);
+ tmp3 = _mm_unpackhi_ps(val->val[2], val->val[3]);
+ v.val[0] = _mm_movelh_ps(tmp0, tmp2);
+ v.val[1] = _mm_movehl_ps(tmp2, tmp0);
+ v.val[2] = _mm_movelh_ps(tmp1, tmp3);
+ v.val[3] = _mm_movehl_ps(tmp3, tmp1);
+ vst1q_f32(ptr, v.val[0]);
+ vst1q_f32((ptr + 4), v.val[1]);
+ vst1q_f32((ptr + 8), v.val[2]);
+ vst1q_f32((ptr + 12), v.val[3]);
+}
+#define vst4q_f32(ptr, val) vst4q_f32_ptr(ptr, &val)
+
+//void vst4q_p8(__transfersize(64) poly8_t * ptr, poly8x16x4_t val);// VST4.8 {d0, d2, d4, d6}, [r0]
+void vst4q_p8_ptr(__transfersize(64) poly8_t * ptr, poly8x16x4_t * val);
+#define vst4q_p8 vst4q_u8
+
+//void vst4q_p16(__transfersize(32) poly16_t * ptr, poly16x8x4_t val);// VST4.16 {d0, d2, d4, d6}, [r0]
+void vst4q_p16_ptr(__transfersize(32) poly16_t * ptr, poly16x8x4_t * val);
+#define vst4q_p16 vst4q_s16
+
+//void vst4_u8(__transfersize(32) uint8_t * ptr, uint8x8x4_t val)// VST4.8 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE void vst4_u8_ptr(__transfersize(32) uint8_t * ptr, uint8x8x4_t* val)
+{
+ uint8x8x4_t v;
+ __m128i sh0, sh1;
+ sh0 = _mm_unpacklo_epi8(val->val[0],val->val[1]); // a0,b0,a1,b1,a2,b2,a3,b3,a4,b4,a5,b5, a6,b6,a7,b7,
+ sh1 = _mm_unpacklo_epi8(val->val[2],val->val[3]); // c0,d0,c1,d1,c2,d2,c3,d3, c4,d4,c5,d5,c6,d6,c7,d7
+ v.val[0] = _mm_unpacklo_epi16(sh0,sh1); // a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,
+ v.val[2] = _mm_unpackhi_epi16(sh0,sh1); //a4,b4,c4,d4,a5,b5,c5,d5, a6,b6,c6,d6,a7,b7,c7,d7
+ vst1q_u8(ptr, v.val[0]);
+ vst1q_u8((ptr + 16), v.val[2]);
+}
+#define vst4_u8(ptr, val) vst4_u8_ptr(ptr, &val)
+
+//void vst4_u16(__transfersize(16) uint16_t * ptr, uint16x4x4_t val)// VST4.16 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE void vst4_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x4x4_t* val)
+{
+ uint16x4x4_t v;
+ __m128i sh0, sh1;
+ sh0 = _mm_unpacklo_epi16(val->val[0],val->val[1]); //a0,a1,b0,b1,c0,c1,d0,d1,
+ sh1 = _mm_unpacklo_epi16(val->val[2],val->val[3]); //a2,a3,b2,b3,c2,c3,d2,d3
+ v.val[0] = _mm_unpacklo_epi32(sh0,sh1); // a0,a1,a2,a3,b0,b1,b2,b3
+ v.val[2] = _mm_unpackhi_epi32(sh0,sh1); // c0,c1,c2,c3,d0,d1,d2,d3
+ vst1q_u16(ptr, v.val[0]); //store as 128 bit structure
+ vst1q_u16((ptr + 8), v.val[2]);
+}
+#define vst4_u16(ptr, val) vst4_u16_ptr(ptr, &val)
+
+//void vst4_u32(__transfersize(8) uint32_t * ptr, uint32x2x4_t val)// VST4.32 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE void vst4_u32_ptr(__transfersize(8) uint32_t * ptr, uint32x2x4_t* val)
+{ //0,4, 1,5, 2,6, 3,7
+ uint32x2x4_t v;
+ __m128i sh0, sh1;
+ sh0 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //0,1,4,5
+ sh1 = _mm_unpacklo_epi32(val->val[2], val->val[3]); //2,3,6,7
+ v.val[0] = _mm_unpacklo_epi64(sh0,sh1); //
+ v.val[1] = _mm_unpackhi_epi64(sh0,sh1); //
+ vst1q_u32(ptr, v.val[0]); //store as 128 bit structure
+ vst1q_u32((ptr + 4), v.val[1]);
+}
+#define vst4_u32(ptr, val) vst4_u32_ptr(ptr, &val)
+
+//void vst4_u64(__transfersize(4) uint64_t * ptr, uint64x1x4_t val)// VST1.64 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE void vst4_u64_ptr(__transfersize(4) uint64_t * ptr, uint64x1x4_t* val)
+{
+ vst1q_u64(ptr, val->val[0]);
+ vst1q_u64((ptr + 2), val->val[2]);
+}
+#define vst4_u64(ptr, val) vst4_u64_ptr(ptr, &val)
+
+//void vst4_s8(__transfersize(32) int8_t * ptr, int8x8x4_t val) //VST4.8 {d0, d1, d2, d3}, [r0]
+#define vst4_s8(ptr, val) vst4_u8((uint8_t*)ptr, val)
+
+//void vst4_s16(__transfersize(16) int16_t * ptr, int16x4x4_t val) // VST4.16 {d0, d1, d2, d3}, [r0]
+#define vst4_s16(ptr, val) vst4_u16((uint16_t*)ptr, val)
+
+//void vst4_s32(__transfersize(8) int32_t * ptr, int32x2x4_t val) // VST4.32 {d0, d1, d2, d3}, [r0]
+#define vst4_s32(ptr, val) vst4_u32((uint32_t*)ptr, val)
+
+//void vst4_s64(__transfersize(4) int64_t * ptr, int64x1x4_t val); // VST1.64 {d0, d1, d2, d3}, [r0]
+void vst4_s64_ptr(__transfersize(4) int64_t * ptr, int64x1x4_t * val);
+#define vst4_s64(ptr, val) vst4_u64((uint64_t*)ptr, val)
+
+//void vst4_f16(__transfersize(16) __fp16 * ptr, float16x4x4_t val);// VST4.16 {d0, d1, d2, d3}, [r0]
+void vst4_f16_ptr(__transfersize(16) __fp16 * ptr, float16x4x4_t * val);
+// IA32 SIMD doesn't work with 16bit floats currently, so need to go to 32 bit and then work with two 128bit registers. See vld1q_f16 for example
+
+//void vst4_f32(__transfersize(8) float32_t * ptr, float32x2x4_t val)// VST4.32 {d0, d1, d2, d3}, [r0]
+_NEON2SSE_INLINE void vst4_f32_ptr(__transfersize(8) float32_t * ptr, float32x2x4_t* val)
+{ //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1
+ float32x2x4_t v;
+ v.val[0] = _mm_unpacklo_ps(val->val[0],val->val[1]);
+ v.val[2] = _mm_unpacklo_ps(val->val[2],val->val[3]);
+ v.val[1] = _mm_movelh_ps (v.val[0], v.val[2]); //a0, c0, a1,c1,
+ v.val[3] = _mm_movehl_ps (v.val[2],v.val[0]); //b0,d0, b1, d1
+ vst1q_f32(ptr, v.val[1]); //store as 128 bit structure
+ vst1q_f32((ptr + 4), v.val[3]);
+}
+#define vst4_f32(ptr, val) vst4_f32_ptr(ptr, &val)
+
+//void vst4_p8(__transfersize(32) poly8_t * ptr, poly8x8x4_t val);// VST4.8 {d0, d1, d2, d3}, [r0]
+void vst4_p8_ptr(__transfersize(32) poly8_t * ptr, poly8x8x4_t * val);
+#define vst4_p8 vst4_u8
+
+//void vst4_p16(__transfersize(16) poly16_t * ptr, poly16x4x4_t val);// VST4.16 {d0, d1, d2, d3}, [r0]
+void vst4_p16_ptr(__transfersize(16) poly16_t * ptr, poly16x4x4_t * val);
+#define vst4_p16 vst4_u16
+
+//*********** Store a lane of a vector into memory (extract given lane) for a couple of vectors *********************
+//********************************************************************************************************************
+//void vst2q_lane_u16(__transfersize(2) uint16_t * ptr, uint16x8x2_t val, __constrange(0,7) int lane)// VST2.16 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t* val, __constrange(0,7) int lane)
+{
+ vst1q_lane_s16(ptr, val->val[0], lane);
+ vst1q_lane_s16((ptr + 1), val->val[1], lane);
+}
+#define vst2q_lane_u16(ptr, val, lane) vst2q_lane_u16_ptr(ptr, &val, lane)
+
+//void vst2q_lane_u32(__transfersize(2) uint32_t * ptr, uint32x4x2_t val, __constrange(0,3) int lane)// VST2.32 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE void vst2q_lane_u32_ptr(__transfersize(2) uint32_t* ptr, uint32x4x2_t* val, __constrange(0,3) int lane)
+{
+ vst1q_lane_u32(ptr, val->val[0], lane);
+ vst1q_lane_u32((ptr + 1), val->val[1], lane);
+}
+#define vst2q_lane_u32(ptr, val, lane) vst2q_lane_u32_ptr(ptr, &val, lane)
+
+//void vst2q_lane_s16(__transfersize(2) int16_t * ptr, int16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0]
+void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane);
+#define vst2q_lane_s16(ptr, val, lane) vst2q_lane_u16((uint16_t*)ptr, val, lane)
+
+//void vst2q_lane_s32(__transfersize(2) int32_t * ptr, int32x4x2_t val, __constrange(0,3) int lane);// VST2.32 {d0[0], d2[0]}, [r0]
+void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane);
+#define vst2q_lane_s32(ptr, val, lane) vst2q_lane_u32((uint32_t*)ptr, val, lane)
+
+//void vst2q_lane_f16(__transfersize(2) __fp16 * ptr, float16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0]
+void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane);
+//current IA SIMD doesn't support float16
+
+//void vst2q_lane_f32(__transfersize(2) float32_t * ptr, float32x4x2_t val, __constrange(0,3) int lane)// VST2.32 {d0[0], d2[0]}, [r0]
+_NEON2SSE_INLINE void vst2q_lane_f32_ptr(__transfersize(2) float32_t* ptr, float32x4x2_t* val, __constrange(0,3) int lane)
+{
+ vst1q_lane_f32(ptr, val->val[0], lane);
+ vst1q_lane_f32((ptr + 1), val->val[1], lane);
+}
+#define vst2q_lane_f32(ptr, val, lane) vst2q_lane_f32_ptr(ptr, &val, lane)
+
+//void vst2q_lane_p16(__transfersize(2) poly16_t * ptr, poly16x8x2_t val, __constrange(0,7) int lane);// VST2.16 {d0[0], d2[0]}, [r0]
+void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane);
+#define vst2q_lane_p16 vst2q_lane_s16
+
+//void vst2_lane_u16(__transfersize(2) uint16_t * ptr, uint16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0]
+void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); // VST2.16 {d0[0], d1[0]}, [r0]
+#define vst2_lane_u16 vst2q_lane_u16
+
+//void vst2_lane_u32(__transfersize(2) uint32_t * ptr, uint32x2x2_t val, __constrange(0,1) int lane);// VST2.32 {d0[0], d1[0]}, [r0]
+void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); // VST2.32 {d0[0], d1[0]}, [r0]
+#define vst2_lane_u32 vst2q_lane_u32
+
+//void vst2_lane_s8(__transfersize(2) int8_t * ptr, int8x8x2_t val, __constrange(0,7) int lane);// VST2.8 {d0[0], d1[0]}, [r0]
+void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane);
+#define vst2_lane_s8(ptr, val, lane) vst2_lane_u8((uint8_t*)ptr, val, lane)
+
+//void vst2_lane_s16(__transfersize(2) int16_t * ptr, int16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0]
+void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane);
+#define vst2_lane_s16 vst2q_lane_s16
+
+//void vst2_lane_s32(__transfersize(2) int32_t * ptr, int32x2x2_t val, __constrange(0,1) int lane);// VST2.32 {d0[0], d1[0]}, [r0]
+void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane);
+#define vst2_lane_s32 vst2q_lane_s32
+
+//void vst2_lane_f16(__transfersize(2) __fp16 * ptr, float16x4x2_t val, __constrange(0,3) int lane); // VST2.16 {d0[0], d1[0]}, [r0]
+//current IA SIMD doesn't support float16
+
+void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); // VST2.32 {d0[0], d1[0]}, [r0]
+#define vst2_lane_f32 vst2q_lane_f32
+
+//void vst2_lane_p8(__transfersize(2) poly8_t * ptr, poly8x8x2_t val, __constrange(0,7) int lane);// VST2.8 {d0[0], d1[0]}, [r0]
+#define vst2_lane_p8 vst2_lane_u8
+
+//void vst2_lane_p16(__transfersize(2) poly16_t * ptr, poly16x4x2_t val, __constrange(0,3) int lane);// VST2.16 {d0[0], d1[0]}, [r0]
+#define vst2_lane_p16 vst2_lane_u16
+
+//************************* Triple lanes stores *******************************************************
+//*******************************************************************************************************
+//void vst3q_lane_u16(__transfersize(3) uint16_t * ptr, uint16x8x3_t val, __constrange(0,7) int lane)// VST3.16 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t* val, __constrange(0,7) int lane)
+{
+ vst2q_lane_u16_ptr(ptr, (uint16x8x2_t*)val, lane);
+ vst1q_lane_u16((ptr + 2), val->val[2], lane);
+}
+#define vst3q_lane_u16(ptr, val, lane) vst3q_lane_u16_ptr(ptr, &val, lane)
+
+//void vst3q_lane_u32(__transfersize(3) uint32_t * ptr, uint32x4x3_t val, __constrange(0,3) int lane)// VST3.32 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t* val, __constrange(0,3) int lane)
+{
+ vst2q_lane_u32_ptr(ptr, (uint32x4x2_t*)val, lane);
+ vst1q_lane_u32((ptr + 2), val->val[2], lane);
+}
+#define vst3q_lane_u32(ptr, val, lane) vst3q_lane_u32_ptr(ptr, &val, lane)
+
+//void vst3q_lane_s16(__transfersize(3) int16_t * ptr, int16x8x3_t val, __constrange(0,7) int lane);// VST3.16 {d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane);
+#define vst3q_lane_s16(ptr, val, lane) vst3q_lane_u16((uint16_t *)ptr, val, lane)
+
+//void vst3q_lane_s32(__transfersize(3) int32_t * ptr, int32x4x3_t val, __constrange(0,3) int lane);// VST3.32 {d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane);
+#define vst3q_lane_s32(ptr, val, lane) vst3q_lane_u32((uint32_t *)ptr, val, lane)
+
+//void vst3q_lane_f16(__transfersize(3) __fp16 * ptr, float16x8x3_t val, __constrange(0,7) int lane);// VST3.16 {d0[0], d2[0], d4[0]}, [r0]
+void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane);
+//current IA SIMD doesn't support float16
+
+//void vst3q_lane_f32(__transfersize(3) float32_t * ptr, float32x4x3_t val, __constrange(0,3) int lane)// VST3.32 {d0[0], d2[0], d4[0]}, [r0]
+_NEON2SSE_INLINE void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t* val, __constrange(0,3) int lane)
+{
+ vst1q_lane_f32(ptr, val->val[0], lane);
+ vst1q_lane_f32((ptr + 1), val->val[1], lane);
+ vst1q_lane_f32((ptr + 2), val->val[2], lane);
+}
+#define vst3q_lane_f32(ptr, val, lane) vst3q_lane_f32_ptr(ptr, &val, lane)
+
+//void vst3_lane_s8(__transfersize(3) int8_t * ptr, int8x8x3_t val, __constrange(0,7) int lane);// VST3.8 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane);
+#define vst3_lane_s8(ptr, val, lane) vst3_lane_u8((uint8_t *)ptr, val, lane)
+
+//void vst3_lane_s16(__transfersize(3) int16_t * ptr, int16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane);
+#define vst3_lane_s16(ptr, val, lane) vst3_lane_u16((uint16_t *)ptr, val, lane)
+
+//void vst3_lane_s32(__transfersize(3) int32_t * ptr, int32x2x3_t val, __constrange(0,1) int lane);// VST3.32 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane);
+#define vst3_lane_s32(ptr, val, lane) vst3_lane_u32((uint32_t *)ptr, val, lane)
+
+//void vst3_lane_f16(__transfersize(3) __fp16 * ptr, float16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane);
+//current IA SIMD doesn't support float16
+
+//void vst3_lane_f32(__transfersize(3) float32_t * ptr, float32x2x3_t val, __constrange(0,1) int lane)// VST3.32 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane);
+#define vst3_lane_f32 vst3q_lane_f32
+
+//void vst3_lane_p8(__transfersize(3) poly8_t * ptr, poly8x8x3_t val, __constrange(0,7) int lane);// VST3.8 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane);
+#define vst3_lane_p8 vst3_lane_u8
+
+//void vst3_lane_p16(__transfersize(3) poly16_t * ptr, poly16x4x3_t val, __constrange(0,3) int lane);// VST3.16 {d0[0], d1[0], d2[0]}, [r0]
+void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane);
+#define vst3_lane_p16 vst3_lane_s16
+
+//******************************** Quadruple lanes stores ***********************************************
+//*******************************************************************************************************
+//void vst4q_lane_u16(__transfersize(4) uint16_t * ptr, uint16x8x4_t val, __constrange(0,7) int lane)// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t* val4, __constrange(0,7) int lane)
+{
+ vst2q_lane_u16_ptr(ptr, (uint16x8x2_t*)val4->val, lane);
+ vst2q_lane_u16_ptr((ptr + 2),((uint16x8x2_t*)val4->val + 1), lane);
+}
+#define vst4q_lane_u16(ptr, val, lane) vst4q_lane_u16_ptr(ptr, &val, lane)
+
+//void vst4q_lane_u32(__transfersize(4) uint32_t * ptr, uint32x4x4_t val, __constrange(0,3) int lane)// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t* val4, __constrange(0,3) int lane)
+{
+ vst2q_lane_u32_ptr(ptr, (uint32x4x2_t*)val4->val, lane);
+ vst2q_lane_u32_ptr((ptr + 2), ((uint32x4x2_t*)val4->val + 1), lane);
+}
+#define vst4q_lane_u32(ptr, val, lane) vst4q_lane_u32_ptr(ptr, &val, lane)
+
+//void vst4q_lane_s16(__transfersize(4) int16_t * ptr, int16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane);
+#define vst4q_lane_s16(ptr,val,lane) vst4q_lane_u16((uint16_t *)ptr,val,lane)
+
+//void vst4q_lane_s32(__transfersize(4) int32_t * ptr, int32x4x4_t val, __constrange(0,3) int lane);// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane);
+#define vst4q_lane_s32(ptr,val,lane) vst4q_lane_u32((uint32_t *)ptr,val,lane)
+
+//void vst4q_lane_f16(__transfersize(4) __fp16 * ptr, float16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane);
+//current IA SIMD doesn't support float16
+
+//void vst4q_lane_f32(__transfersize(4) float32_t * ptr, float32x4x4_t val, __constrange(0,3) int lane)// VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+_NEON2SSE_INLINE void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t* val, __constrange(0,3) int lane)
+{
+ vst1q_lane_f32(ptr, val->val[0], lane);
+ vst1q_lane_f32((ptr + 1), val->val[1], lane);
+ vst1q_lane_f32((ptr + 2), val->val[2], lane);
+ vst1q_lane_f32((ptr + 3), val->val[3], lane);
+}
+#define vst4q_lane_f32(ptr, val, lane) vst4q_lane_f32_ptr(ptr, &val, lane)
+
+//void vst4q_lane_p16(__transfersize(4) poly16_t * ptr, poly16x8x4_t val, __constrange(0,7) int lane);// VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]
+void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane);
+#define vst4q_lane_p16 vst4q_lane_u16
+
+//void vst4_lane_u8(__transfersize(4) uint8_t * ptr, uint8x8x4_t val, __constrange(0,7) int lane)// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE void vst4_lane_u8_ptr(__transfersize(4) uint8_t * ptr, uint8x8x4_t* val, __constrange(0,7) int lane)
+{
+ vst1q_lane_u8(ptr, val->val[0], lane);
+ vst1q_lane_u8((ptr + 1), val->val[1], lane);
+ vst1q_lane_u8((ptr + 2), val->val[2], lane);
+ vst1q_lane_u8((ptr + 3), val->val[3], lane);
+}
+#define vst4_lane_u8(ptr, val, lane) vst4_lane_u8_ptr(ptr, &val, lane)
+
+//void vst4_lane_u16(__transfersize(4) uint16_t * ptr, uint16x4x4_t val, __constrange(0,3) int lane)// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE void vst4_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x4x4_t* val, __constrange(0,3) int lane)
+{
+ vst1q_lane_u16(ptr, val->val[0], lane);
+ vst1q_lane_u16((ptr + 1),val->val[1], lane);
+ vst1q_lane_u16((ptr + 2), val->val[2], lane);
+ vst1q_lane_u16((ptr + 3), val->val[3], lane);
+}
+#define vst4_lane_u16(ptr, val, lane) vst4_lane_u16_ptr(ptr, &val, lane)
+
+//void vst4_lane_u32(__transfersize(4) uint32_t * ptr, uint32x2x4_t val, __constrange(0,1) int lane)// VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+_NEON2SSE_INLINE void vst4_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x4_t* val, __constrange(0,1) int lane)
+{
+ vst1q_lane_u32(ptr, val->val[0], lane);
+ vst1q_lane_u32((ptr + 1), val->val[1], lane);
+ vst1q_lane_u32((ptr + 2), val->val[2], lane);
+ vst1q_lane_u32((ptr + 3), val->val[3], lane);
+
+}
+#define vst4_lane_u32(ptr, val, lane) vst4_lane_u32_ptr(ptr, &val, lane)
+
+//void vst4_lane_s8(__transfersize(4) int8_t * ptr, int8x8x4_t val, __constrange(0,7) int lane)// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+#define vst4_lane_s8(ptr, val, lane) vst4_lane_u8((uint8_t*)ptr, val, lane)
+
+//void vst4_lane_s16(__transfersize(4) int16_t * ptr, int16x4x4_t val, __constrange(0,3) int lane)// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+#define vst4_lane_s16(ptr, val, lane) vst4_lane_u16((uint16_t*)ptr, val, lane)
+
+//void vst4_lane_s32(__transfersize(4) int32_t * ptr, int32x2x4_t val, __constrange(0,1) int lane)// VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+#define vst4_lane_s32(ptr, val, lane) vst4_lane_u32((uint32_t*)ptr, val, lane)
+
+//void vst4_lane_f16(__transfersize(4) __fp16 * ptr, float16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane);
+//current IA SIMD doesn't support float16
+
+//void vst4_lane_f32(__transfersize(4) float32_t * ptr, float32x2x4_t val, __constrange(0,1) int lane)// VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+#define vst4_lane_f32 vst4q_lane_f32
+
+//void vst4_lane_p8(__transfersize(4) poly8_t * ptr, poly8x8x4_t val, __constrange(0,7) int lane);// VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane);
+#define vst4_lane_p8 vst4_lane_u8
+
+//void vst4_lane_p16(__transfersize(4) poly16_t * ptr, poly16x4x4_t val, __constrange(0,3) int lane);// VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
+void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane);
+#define vst4_lane_p16 vst4_lane_u16
+
+//**************************************************************************************************
+//************************ Extract lanes from a vector ********************************************
+//**************************************************************************************************
+//These intrinsics extract a single lane (element) from a vector.
+
+uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
+#define vgetq_lane_u8 _MM_EXTRACT_EPI8
+
+uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // VMOV.s16 r0, d0[0]
+#define vgetq_lane_u16 _MM_EXTRACT_EPI16
+
+uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+#define vgetq_lane_u32 _MM_EXTRACT_EPI32
+
+int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 r0, d0[0]
+#define vgetq_lane_s8 vgetq_lane_u8
+
+int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 r0, d0[0]
+#define vgetq_lane_s16 vgetq_lane_u16
+
+int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+#define vgetq_lane_s32 vgetq_lane_u32
+
+poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
+#define vgetq_lane_p8 vgetq_lane_u8
+
+poly16_t vgetq_lane_p16(poly16x8_t vec, __constrange(0,7) int lane); // VMOV.s16 r0, d0[0]
+#define vgetq_lane_p16 vgetq_lane_u16
+
+float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
+_NEON2SSE_INLINE float32_t vgetq_lane_f32(float32x4_t vec, __constrange(0,3) int lane)
+{
+ int32_t ilane;
+ ilane = _MM_EXTRACT_PS(vec,lane);
+ return *(float*)&ilane;
+}
+
+int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
+#define vgetq_lane_s64 (int64_t) vgetq_lane_u64
+
+uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
+#define vgetq_lane_u64 _MM_EXTRACT_EPI64
+
+// ***************** Set lanes within a vector ********************************************
+// **************************************************************************************
+//These intrinsics set a single lane (element) within a vector.
+//same functions as vld1_lane_xx ones, but take the value to be set directly.
+
+uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+_NEON2SSE_INLINE uint8x16_t vsetq_lane_u8(uint8_t value, uint8x16_t vec, __constrange(0,15) int lane)
+{
+ uint8_t val;
+ val = value;
+ return vld1q_lane_u8(&val, vec, lane);
+}
+
+uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+_NEON2SSE_INLINE uint16x8_t vsetq_lane_u16(uint16_t value, uint16x8_t vec, __constrange(0,7) int lane)
+{
+ uint16_t val;
+ val = value;
+ return vld1q_lane_u16(&val, vec, lane);
+}
+
+uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+_NEON2SSE_INLINE uint32x4_t vsetq_lane_u32(uint32_t value, uint32x4_t vec, __constrange(0,3) int lane)
+{
+ uint32_t val;
+ val = value;
+ return vld1q_lane_u32(&val, vec, lane);
+}
+
+int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+_NEON2SSE_INLINE int8x16_t vsetq_lane_s8(int8_t value, int8x16_t vec, __constrange(0,15) int lane)
+{
+ int8_t val;
+ val = value;
+ return vld1q_lane_s8(&val, vec, lane);
+}
+
+int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+_NEON2SSE_INLINE int16x8_t vsetq_lane_s16(int16_t value, int16x8_t vec, __constrange(0,7) int lane)
+{
+ int16_t val;
+ val = value;
+ return vld1q_lane_s16(&val, vec, lane);
+}
+
+int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+_NEON2SSE_INLINE int32x4_t vsetq_lane_s32(int32_t value, int32x4_t vec, __constrange(0,3) int lane)
+{
+ int32_t val;
+ val = value;
+ return vld1q_lane_s32(&val, vec, lane);
+}
+
+poly8x16_t vsetq_lane_p8(poly8_t value, poly8x16_t vec, __constrange(0,15) int lane); // VMOV.8 d0[0],r0
+#define vsetq_lane_p8 vsetq_lane_u8
+
+poly16x8_t vsetq_lane_p16(poly16_t value, poly16x8_t vec, __constrange(0,7) int lane); // VMOV.16 d0[0],r0
+#define vsetq_lane_p16 vsetq_lane_u16
+
+float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane); // VMOV.32 d0[0],r0
+_NEON2SSE_INLINE float32x4_t vsetq_lane_f32(float32_t value, float32x4_t vec, __constrange(0,3) int lane)
+{
+ float32_t val;
+ val = value;
+}
+
+int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0
+_NEON2SSE_INLINE int64x2_t vsetq_lane_s64(int64_t value, int64x2_t vec, __constrange(0,1) int lane)
+{
+ uint64_t val;
+ val = value;
+ return vld1q_lane_s64(&val, vec, lane);
+}
+
+uint64x2_t vsetq_lane_u64(uint64_t value, uint64x2_t vec, __constrange(0,1) int lane); // VMOV d0,r0,r0
+#define vsetq_lane_u64 vsetq_lane_s64
+
+// *******************************************************************************
+// **************** Initialize a vector from bit pattern ***************************
+// *******************************************************************************
+//These intrinsics create a vector from a literal bit pattern.
+
+//no IA32 SIMD avalilable
+
+//********************* Set all lanes to same value ********************************
+//*********************************************************************************
+//These intrinsics set all lanes to the same value.
+
+uint8x16_t vdupq_n_u8(uint8_t value); // VDUP.8 q0,r0
+#define vdupq_n_u8(value) _mm_set1_epi8((uint8_t) (value))
+
+uint16x8_t vdupq_n_u16(uint16_t value); // VDUP.16 q0,r0
+#define vdupq_n_u16(value) _mm_set1_epi16((uint16_t) (value))
+
+uint32x4_t vdupq_n_u32(uint32_t value); // VDUP.32 q0,r0
+#define vdupq_n_u32(value) _mm_set1_epi32((uint32_t) (value))
+
+int8x16_t vdupq_n_s8(int8_t value); // VDUP.8 q0,r0
+#define vdupq_n_s8 _mm_set1_epi8
+
+int16x8_t vdupq_n_s16(int16_t value); // VDUP.16 q0,r0
+#define vdupq_n_s16 _mm_set1_epi16
+
+int32x4_t vdupq_n_s32(int32_t value); // VDUP.32 q0,r0
+#define vdupq_n_s32 _mm_set1_epi32
+
+poly8x16_t vdupq_n_p8(poly8_t value); // VDUP.8 q0,r0
+#define vdupq_n_p8 vdupq_n_u8
+
+poly16x8_t vdupq_n_p16(poly16_t value); // VDUP.16 q0,r0
+#define vdupq_n_p16 vdupq_n_u16
+
+float32x4_t vdupq_n_f32(float32_t value); // VDUP.32 q0,r0
+#define vdupq_n_f32 _mm_set1_ps
+
+int64x2_t vdupq_n_s64(int64_t value); // VMOV d0,r0,r0
+_NEON2SSE_INLINE int64x2_t vdupq_n_s64(int64_t value)
+{
+ _NEON2SSE_ALIGN_16 int64_t value2[2] = {value, value}; //value may be an immediate
+ return LOAD_SI128(value2);
+}
+
+uint64x2_t vdupq_n_u64(uint64_t value); // VMOV d0,r0,r0
+_NEON2SSE_INLINE uint64x2_t vdupq_n_u64(uint64_t value)
+{
+ _NEON2SSE_ALIGN_16 uint64_t val[2] = {value, value}; //value may be an immediate
+ return LOAD_SI128(val);
+}
+
+//**** Set all lanes to same value ************************
+//Same functions as above - just aliaces.********************
+//Probably they reflect the fact that 128-bit functions versions use VMOV instruction **********
+
+uint8x16_t vmovq_n_u8(uint8_t value); // VDUP.8 q0,r0
+#define vmovq_n_u8 vdupq_n_u8
+
+uint16x8_t vmovq_n_u16(uint16_t value); // VDUP.16 q0,r0
+#define vmovq_n_u16 vdupq_n_s16
+
+uint32x4_t vmovq_n_u32(uint32_t value); // VDUP.32 q0,r0
+#define vmovq_n_u32 vdupq_n_u32
+
+int8x16_t vmovq_n_s8(int8_t value); // VDUP.8 q0,r0
+#define vmovq_n_s8 vdupq_n_s8
+
+int16x8_t vmovq_n_s16(int16_t value); // VDUP.16 q0,r0
+#define vmovq_n_s16 vdupq_n_s16
+
+int32x4_t vmovq_n_s32(int32_t value); // VDUP.32 q0,r0
+#define vmovq_n_s32 vdupq_n_s32
+
+poly8x16_t vmovq_n_p8(poly8_t value); // VDUP.8 q0,r0
+#define vmovq_n_p8 vdupq_n_u8
+
+poly16x8_t vmovq_n_p16(poly16_t value); // VDUP.16 q0,r0
+#define vmovq_n_p16 vdupq_n_s16
+
+float32x4_t vmovq_n_f32(float32_t value); // VDUP.32 q0,r0
+#define vmovq_n_f32 vdupq_n_f32
+
+int64x2_t vmovq_n_s64(int64_t value); // VMOV d0,r0,r0
+#define vmovq_n_s64 vdupq_n_s64
+
+uint64x2_t vmovq_n_u64(uint64_t value); // VMOV d0,r0,r0
+#define vmovq_n_u64 vdupq_n_u64
+
+//**************Set all lanes to the value of one lane of a vector *************
+//****************************************************************************
+//here shuffle is better solution than lane extraction followed by set1 function
+
+// ********************************************************************
+// ******************** Combining vectors *****************************
+// ********************************************************************
+//These intrinsics join two 64 bit vectors into a single 128bit vector.
+
+//current IA SIMD doesn't support float16
+
+//**********************************************************************
+//************************* Splitting vectors **************************
+//**********************************************************************
+//**************** Get high part ******************************************
+//These intrinsics split a 128 bit vector into 2 component 64 bit vectors
+
+// IA32 SIMD doesn't work with 16bit floats currently
+
+//********************** Get low part **********************
+//**********************************************************
+
+// IA32 SIMD doesn't work with 16bit floats currently
+
+//**************************************************************************
+//************************ Converting vectors **********************************
+//**************************************************************************
+//************* Convert from float ***************************************
+// need to set _MM_SET_ROUNDING_MODE ( x) accordingly
+
+int32x4_t vcvtq_s32_f32(float32x4_t a); // VCVT.S32.F32 q0, q0
+#define vcvtq_s32_f32 _mm_cvtps_epi32
+
+uint32x4_t vcvtq_u32_f32(float32x4_t a); // VCVT.U32.F32 q0, q0
+_NEON2SSE_INLINE uint32x4_t vcvtq_u32_f32(float32x4_t a) // VCVT.U32.F32 q0, q0
+{ //No single instruction SSE solution but we could implement it as following:
+ __m128i resi;
+ __m128 zero, mask, a_pos, mask_f_max_si, res;
+ _NEON2SSE_ALIGN_16 int32_t c7fffffff[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+ zero = _mm_setzero_ps();
+ mask = _mm_cmpgt_ps(a, zero);
+ a_pos = _mm_and_ps(a, mask);
+ mask_f_max_si = _mm_cmpgt_ps(a_pos,*(__m128*)c7fffffff);
+ res = _mm_sub_ps(a_pos, mask_f_max_si); //if the input fits to signed we don't subtract anything
+ resi = _mm_cvtps_epi32(res);
+ return _mm_add_epi32(resi, *(__m128i*)&mask_f_max_si);
+}
+
+// ***** Convert to the fixed point with the number of fraction bits specified by b ***********
+//*************************************************************************************************
+//Intel SIMD doesn't support fixed point
+
+int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.S32.F32 q0, q0, #32
+uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.U32.F32 q0, q0, #32
+
+//***************** Convert to float *************************
+//*************************************************************
+
+float32x4_t vcvtq_f32_s32(int32x4_t a); // VCVT.F32.S32 q0, q0
+#define vcvtq_f32_s32(a) _mm_cvtepi32_ps(a)
+
+float32x4_t vcvtq_f32_u32(uint32x4_t a); // VCVT.F32.U32 q0, q0
+_NEON2SSE_INLINE float32x4_t vcvtq_f32_u32(uint32x4_t a) // VCVT.F32.U32 q0, q0
+{ //solution may be not optimal
+ __m128 two16, fHi, fLo;
+ __m128i hi, lo;
+ two16 = _mm_set1_ps((float)0x10000); //2^16
+ // Avoid double rounding by doing two exact conversions
+ // of high and low 16-bit segments
+ hi = _mm_srli_epi32(a, 16);
+ lo = _mm_srli_epi32(_mm_slli_epi32(a, 16), 16);
+ fHi = _mm_mul_ps(_mm_cvtepi32_ps(hi), two16);
+ fLo = _mm_cvtepi32_ps(lo);
+ // do single rounding according to current rounding mode
+ return _mm_add_ps(fHi, fLo);
+}
+
+//**************Convert between floats ***********************
+//************************************************************
+
+//Intel SIMD doesn't support 16bits floats curently
+
+//Intel SIMD doesn't support 16bits floats curently, the only solution is to store 16bit floats and load as 32 bits
+
+//************Vector narrow integer conversion (truncation) ******************
+//****************************************************************************
+
+//**************** Vector long move ***********************
+//***********************************************************
+
+//*************Vector saturating narrow integer*****************
+//**************************************************************
+
+//************* Vector saturating narrow integer signed->unsigned **************
+//*****************************************************************************
+
+// ********************************************************
+// **************** Table look up **************************
+// ********************************************************
+//VTBL (Vector Table Lookup) uses byte indexes in a control vector to look up byte values
+//in a table and generate a new vector. Indexes out of range return 0.
+//for Intel SIMD we need to set the MSB to 1 for zero return
+
+//Special trick to avoid __declspec(align('8')) won't be aligned" error
+
+//Special trick to avoid __declspec(align('16')) won't be aligned" error
+
+//****************** Extended table look up intrinsics ***************************
+//**********************************************************************************
+//VTBX (Vector Table Extension) works in the same way as VTBL do,
+// except that indexes out of range leave the destination element unchanged.
+
+//Special trick to avoid __declspec(align('8')) won't be aligned" error
+
+//*************************************************************************************************
+// *************************** Operations with a scalar value *********************************
+//*************************************************************************************************
+
+//******* Vector multiply accumulate by scalar *************************************************
+//**********************************************************************************************
+
+//***************** Vector widening multiply accumulate by scalar **********************
+//***************************************************************************************
+
+// ******** Vector widening saturating doubling multiply accumulate by scalar *******************************
+// ************************************************************************************************
+
+// ****** Vector multiply subtract by scalar *****************
+// *************************************************************
+
+// **** Vector widening multiply subtract by scalar ****
+// ****************************************************
+
+//********* Vector widening saturating doubling multiply subtract by scalar **************************
+//******************************************************************************************************
+
+//********** Vector multiply with scalar *****************************
+
+int16x8_t vmulq_n_s16(int16x8_t a, int16_t b); // VMUL.I16 q0,q0,d0[0]
+_NEON2SSE_INLINE int16x8_t vmulq_n_s16(int16x8_t a, int16_t b) // VMUL.I16 q0,q0,d0[0]
+{
+ int16x8_t b16x8;
+ b16x8 = vdupq_n_s16(b);
+ return vmulq_s16(a, b16x8);
+}
+
+int32x4_t vmulq_n_s32(int32x4_t a, int32_t b); // VMUL.I32 q0,q0,d0[0]
+_NEON2SSE_INLINE int32x4_t vmulq_n_s32(int32x4_t a, int32_t b) // VMUL.I32 q0,q0,d0[0]
+{
+ int32x4_t b32x4;
+ b32x4 = vdupq_n_s32(b);
+ return vmulq_s32(a, b32x4);
+}
+
+float32x4_t vmulq_n_f32(float32x4_t a, float32_t b); // VMUL.F32 q0,q0,d0[0]
+_NEON2SSE_INLINE float32x4_t vmulq_n_f32(float32x4_t a, float32_t b) // VMUL.F32 q0,q0,d0[0]
+{
+ float32x4_t b32x4;
+ b32x4 = vdupq_n_f32(b);
+ return vmulq_f32(a, b32x4);
+}
+
+uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b); // VMUL.I16 q0,q0,d0[0]
+_NEON2SSE_INLINE uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b) // VMUL.I16 q0,q0,d0[0]
+{
+ uint16x8_t b16x8;
+ b16x8 = vdupq_n_s16(b);
+ return vmulq_s16(a, b16x8);
+}
+
+uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b); // VMUL.I32 q0,q0,d0[0]
+_NEON2SSE_INLINE uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b) // VMUL.I32 q0,q0,d0[0]
+{
+ uint32x4_t b32x4;
+ b32x4 = vdupq_n_u32(b);
+ return vmulq_u32(a, b32x4);
+}
+
+//**** Vector long multiply with scalar ************
+
+//**** Vector long multiply by scalar ****
+
+//********* Vector saturating doubling long multiply with scalar *******************
+
+//************* Vector saturating doubling long multiply by scalar ***********************************************
+
+// *****Vector saturating doubling multiply high with scalar *****
+
+int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQDMULH.S16 q0,q0,d0[0]
+_NEON2SSE_INLINE int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2) // VQDMULH.S16 q0,q0,d0[0]
+{ //solution may be not optimal
+ int16x8_t scalar;
+ scalar = vdupq_n_s16(val2);
+ return vqdmulhq_s16(vec1, scalar);
+}
+
+int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQDMULH.S32 q0,q0,d0[0]
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_UNEFFECTIVE)
+{
+ int32x4_t scalar;
+ scalar = vdupq_n_s32(val2);
+ return vqdmulhq_s32(vec1, scalar);
+}
+
+//***** Vector saturating doubling multiply high by scalar ****************
+
+//******** Vector saturating rounding doubling multiply high with scalar ***
+
+#if defined(USE_SSSE3)
+int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2); // VQRDMULH.S16 q0,q0,d0[0]
+_NEON2SSE_INLINE int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2) // VQRDMULH.S16 q0,q0,d0[0]
+{ //solution may be not optimal
+ int16x8_t scalar;
+ scalar = vdupq_n_s16(val2);
+ return vqrdmulhq_s16(vec1, scalar);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2); // VQRDMULH.S32 q0,q0,d0[0]
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2), _NEON2SSE_REASON_SLOW_UNEFFECTIVE)
+{
+ int32x4_t scalar;
+ scalar = vdupq_n_s32(val2);
+ return vqrdmulhq_s32(vec1, scalar);
+}
+#endif
+
+//********* Vector rounding saturating doubling multiply high by scalar ****
+
+//**************Vector multiply accumulate with scalar *******************
+
+int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLA.I16 q0, q0, d0[0]
+_NEON2SSE_INLINE int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) // VMLA.I16 q0, q0, d0[0]
+{
+ int16x8_t scalar;
+ scalar = vdupq_n_s16(c);
+ return vmlaq_s16(a,b,scalar);
+}
+
+int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLA.I32 q0, q0, d0[0]
+_NEON2SSE_INLINE int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) // VMLA.I32 q0, q0, d0[0]
+{
+ int32x4_t scalar;
+ scalar = vdupq_n_s32(c);
+ return vmlaq_s32(a,b,scalar);
+}
+
+uint16x8_t vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLA.I16 q0, q0, d0[0]
+#define vmlaq_n_u16 vmlaq_n_s16
+
+uint32x4_t vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLA.I32 q0, q0, d0[0]
+#define vmlaq_n_u32 vmlaq_n_s32
+
+float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLA.F32 q0, q0, d0[0]
+_NEON2SSE_INLINE float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) // VMLA.F32 q0, q0, d0[0]
+{
+ float32x4_t scalar;
+ scalar = vdupq_n_f32(c);
+ return vmlaq_f32(a,b,scalar);
+}
+
+//************Vector widening multiply accumulate with scalar****************************
+
+//************ Vector widening saturating doubling multiply accumulate with scalar **************
+
+//******** Vector multiply subtract with scalar **************
+
+int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c); // VMLS.I16 q0, q0, d0[0]
+_NEON2SSE_INLINE int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) // VMLS.I16 q0, q0, d0[0]
+{
+ int16x8_t vc;
+ vc = vdupq_n_s16(c);
+ return vmlsq_s16(a, b,vc);
+}
+
+int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c); // VMLS.I32 q0, q0, d0[0]
+_NEON2SSE_INLINE int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) // VMLS.I32 q0, q0, d0[0]
+{
+ int32x4_t vc;
+ vc = vdupq_n_s32(c);
+ return vmlsq_s32(a,b,vc);
+}
+
+uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c); // VMLS.I16 q0, q0, d0[0]
+_NEON2SSE_INLINE uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) // VMLS.I16 q0, q0, d0[0]
+{
+ uint32x4_t vc;
+ vc = vdupq_n_u32(c);
+ return vmlsq_u32(a,b,vc);
+}
+
+uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c); // VMLS.I32 q0, q0, d0[0]
+_NEON2SSE_INLINE uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) // VMLS.I32 q0, q0, d0[0]
+{
+ uint32x4_t vc;
+ vc = vdupq_n_u32(c);
+ return vmlsq_u32(a,b,vc);
+}
+
+float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLS.F32 q0, q0, d0[0]
+_NEON2SSE_INLINE float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c)
+{
+ float32x4_t vc;
+ vc = vdupq_n_f32(c);
+ return vmlsq_f32(a,b,vc);
+}
+
+//**** Vector widening multiply subtract with scalar ******
+
+//***** Vector widening saturating doubling multiply subtract with scalar *********
+//**********************************************************************************
+
+//******************* Vector extract ***********************************************
+//*************************************************************************************
+//VEXT (Vector Extract) extracts elements from the bottom end of the second operand
+//vector and the top end of the first, concatenates them, and places the result in the destination vector
+//c elements from the bottom end of the second operand and (8-c) from the top end of the first
+
+#if defined(USE_SSSE3)
+//same result tested
+
+#endif
+
+#if defined(USE_SSSE3)
+int8x16_t vextq_s8(int8x16_t a, int8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+#define vextq_s8(a,b,c) _MM_ALIGNR_EPI8 (b,a,c)
+
+uint8x16_t vextq_u8(uint8x16_t a, uint8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+#define vextq_u8(a,b,c) _MM_ALIGNR_EPI8 (b,a,c)
+
+poly8x16_t vextq_p8(poly8x16_t a, poly8x16_t b, __constrange(0,15) int c); // VEXT.8 q0,q0,q0,#0
+#define vextq_p8 vextq_s8
+
+int16x8_t vextq_s16(int16x8_t a, int16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+#define vextq_s16(a,b,c) _MM_ALIGNR_EPI8 (b,a,c * 2)
+
+uint16x8_t vextq_u16(uint16x8_t a, uint16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+#define vextq_u16(a,b,c) _MM_ALIGNR_EPI8 (b,a,c * 2)
+
+poly16x8_t vextq_p16(poly16x8_t a, poly16x8_t b, __constrange(0,7) int c); // VEXT.16 q0,q0,q0,#0
+#define vextq_p16 vextq_s16
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vextq_s32(int32x4_t a, int32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0
+#define vextq_s32(a,b,c) _MM_ALIGNR_EPI8 (b,a,c * 4)
+
+uint32x4_t vextq_u32(uint32x4_t a, uint32x4_t b, __constrange(0,3) int c); // VEXT.32 q0,q0,q0,#0
+#define vextq_u32(a,b,c) _MM_ALIGNR_EPI8 (b,a,c * 4)
+
+int64x2_t vextq_s64(int64x2_t a, int64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0
+#define vextq_s64(a,b,c) _MM_ALIGNR_EPI8(b,a,c * 8)
+
+uint64x2_t vextq_u64(uint64x2_t a, uint64x2_t b, __constrange(0,1) int c); // VEXT.64 q0,q0,q0,#0
+#define vextq_u64(a,b,c) _MM_ALIGNR_EPI8(b,a,c * 8)
+#endif
+
+//************ Reverse vector elements (swap endianness)*****************
+//*************************************************************************
+//VREVn.m reverses the order of the m-bit lanes within a set that is n bits wide.
+
+#if defined(USE_SSSE3)
+int8x16_t vrev64q_s8(int8x16_t vec); // VREV64.8 q0,q0
+_NEON2SSE_INLINE int8x16_t vrev64q_s8(int8x16_t vec) // VREV64.8 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_rev_e8[16] = {7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9, 8};
+ return _mm_shuffle_epi8 (vec, *(__m128i*) mask_rev_e8);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vrev64q_s16(int16x8_t vec); // VREV64.16 q0,q0
+_NEON2SSE_INLINE int16x8_t vrev64q_s16(int16x8_t vec) // VREV64.16 q0,q0
+{ //no _mm_shuffle_epi16, _mm_shuffle_epi8 to be used with the corresponding mask
+ _NEON2SSE_ALIGN_16 int8_t mask_rev_e16[16] = {6,7, 4,5,2,3,0,1,14,15,12,13,10,11,8,9};
+ return _mm_shuffle_epi8 (vec, *(__m128i*)mask_rev_e16);
+}
+#endif
+
+int32x4_t vrev64q_s32(int32x4_t vec); // VREV64.32 q0,q0
+_NEON2SSE_INLINE int32x4_t vrev64q_s32(int32x4_t vec) // VREV64.32 q0,q0
+{
+ return _mm_shuffle_epi32 (vec, 1 | (0 << 2) | (3 << 4) | (2 << 6) );
+}
+
+#if defined(USE_SSSE3)
+uint8x16_t vrev64q_u8(uint8x16_t vec); // VREV64.8 q0,q0
+#define vrev64q_u8 vrev64q_s8
+
+uint16x8_t vrev64q_u16(uint16x8_t vec); // VREV64.16 q0,q0
+#define vrev64q_u16 vrev64q_s16
+#endif
+
+uint32x4_t vrev64q_u32(uint32x4_t vec); // VREV64.32 q0,q0
+#define vrev64q_u32 vrev64q_s32
+
+#if defined(USE_SSSE3)
+poly8x16_t vrev64q_p8(poly8x16_t vec); // VREV64.8 q0,q0
+#define vrev64q_p8 vrev64q_u8
+
+poly16x8_t vrev64q_p16(poly16x8_t vec); // VREV64.16 q0,q0
+#define vrev64q_p16 vrev64q_s16
+#endif
+
+float32x4_t vrev64q_f32(float32x4_t vec); // VREV64.32 q0,q0
+#define vrev64q_f32(vec) _mm_shuffle_ps (vec, vec, _MM_SHUFFLE(2,3, 0,1))
+
+//******************** 32 bit shuffles **********************
+//************************************************************
+
+#if defined(USE_SSSE3)
+int8x16_t vrev32q_s8(int8x16_t vec); // VREV32.8 q0,q0
+_NEON2SSE_INLINE int8x16_t vrev32q_s8(int8x16_t vec) // VREV32.8 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_rev_e8[16] = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
+ return _mm_shuffle_epi8 (vec, *(__m128i*) mask_rev_e8);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vrev32q_s16(int16x8_t vec); // VREV32.16 q0,q0
+_NEON2SSE_INLINE int16x8_t vrev32q_s16(int16x8_t vec) // VREV32.16 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_rev_e8[16] = {2,3,0,1, 6,7, 4,5, 10,11, 8,9, 14,15,12,13};
+ return _mm_shuffle_epi8 (vec, *(__m128i*) mask_rev_e8);
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint8x16_t vrev32q_u8(uint8x16_t vec); // VREV32.8 q0,q0
+#define vrev32q_u8 vrev32q_s8
+
+uint16x8_t vrev32q_u16(uint16x8_t vec); // VREV32.16 q0,q0
+#define vrev32q_u16 vrev32q_s16
+
+poly8x16_t vrev32q_p8(poly8x16_t vec); // VREV32.8 q0,q0
+#define vrev32q_p8 vrev32q_u8
+#endif
+
+//************* 16 bit shuffles **********************
+//******************************************************
+
+#if defined(USE_SSSE3)
+int8x16_t vrev16q_s8(int8x16_t vec); // VREV16.8 q0,q0
+_NEON2SSE_INLINE int8x16_t vrev16q_s8(int8x16_t vec) // VREV16.8 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_rev8[16] = {1,0, 3,2, 5,4, 7,6, 9,8, 11, 10, 13, 12, 15, 14};
+ return _mm_shuffle_epi8 (vec, *(__m128i*) mask_rev8);
+}
+#endif
+
+#if defined(USE_SSSE3)
+uint8x16_t vrev16q_u8(uint8x16_t vec); // VREV16.8 q0,q0
+#define vrev16q_u8 vrev16q_s8
+
+poly8x16_t vrev16q_p8(poly8x16_t vec); // VREV16.8 q0,q0
+#define vrev16q_p8 vrev16q_u8
+#endif
+
+//*********************************************************************
+//**************** Other single operand arithmetic *******************
+//*********************************************************************
+
+//*********** Absolute: Vd[i] = |Va[i]| **********************************
+//************************************************************************
+
+int8x16_t vabsq_s8(int8x16_t a); // VABS.S8 q0,q0
+#define vabsq_s8 _mm_abs_epi8
+
+int16x8_t vabsq_s16(int16x8_t a); // VABS.S16 q0,q0
+#define vabsq_s16 _mm_abs_epi16
+
+int32x4_t vabsq_s32(int32x4_t a); // VABS.S32 q0,q0
+#define vabsq_s32 _mm_abs_epi32
+
+float32x4_t vabsq_f32(float32x4_t a); // VABS.F32 q0,q0
+_NEON2SSE_INLINE float32x4_t vabsq_f32(float32x4_t a) // VABS.F32 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int32_t c7fffffff[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+ return _mm_and_ps (a, *(__m128*)c7fffffff);
+}
+
+//****** Saturating absolute: Vd[i] = sat(|Va[i]|) *********************
+//**********************************************************************
+//For signed-integer data types, the absolute value of the most negative value is not representable by the data type, saturation takes place
+
+#if defined(USE_SSSE3)
+int8x16_t vqabsq_s8(int8x16_t a); // VQABS.S8 q0,q0
+_NEON2SSE_INLINE int8x16_t vqabsq_s8(int8x16_t a) // VQABS.S8 q0,q0
+{
+ __m128i c_128, abs, abs_cmp;
+ c_128 = _mm_set1_epi8 (0x80); //-128
+ abs = _mm_abs_epi8 (a);
+ abs_cmp = _mm_cmpeq_epi8 (abs, c_128);
+ return _mm_xor_si128 (abs, abs_cmp);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vqabsq_s16(int16x8_t a); // VQABS.S16 q0,q0
+_NEON2SSE_INLINE int16x8_t vqabsq_s16(int16x8_t a) // VQABS.S16 q0,q0
+{
+ __m128i c_32768, abs, abs_cmp;
+ c_32768 = _mm_set1_epi16 (0x8000); //-32768
+ abs = _mm_abs_epi16 (a);
+ abs_cmp = _mm_cmpeq_epi16 (abs, c_32768);
+ return _mm_xor_si128 (abs, abs_cmp);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int32x4_t vqabsq_s32(int32x4_t a); // VQABS.S32 q0,q0
+_NEON2SSE_INLINE int32x4_t vqabsq_s32(int32x4_t a) // VQABS.S32 q0,q0
+{
+ __m128i c80000000, abs, abs_cmp;
+ c80000000 = _mm_set1_epi32 (0x80000000); //most negative value
+ abs = _mm_abs_epi32 (a);
+ abs_cmp = _mm_cmpeq_epi32 (abs, c80000000);
+ return _mm_xor_si128 (abs, abs_cmp);
+}
+#endif
+
+//*************** Negate: Vd[i] = - Va[i] *************************************
+//*****************************************************************************
+//several Negate implementations possible for SIMD.
+//e.//function _mm_sign function(a, negative numbers vector), but the following one gives good performance:
+
+int8x16_t vnegq_s8(int8x16_t a); // VNE//q0,q0
+_NEON2SSE_INLINE int8x16_t vnegq_s8(int8x16_t a) // VNE//q0,q0
+{
+ __m128i zero;
+ zero = _mm_setzero_si128 ();
+ return _mm_sub_epi8 (zero, a);
+} //or _mm_sign_epi8 (a, negative numbers vector)
+
+int16x8_t vnegq_s16(int16x8_t a); // VNE//q0,q0
+_NEON2SSE_INLINE int16x8_t vnegq_s16(int16x8_t a) // VNE//q0,q0
+{
+ __m128i zero;
+ zero = _mm_setzero_si128 ();
+ return _mm_sub_epi16 (zero, a);
+} //or _mm_sign_epi16 (a, negative numbers vector)
+
+int32x4_t vnegq_s32(int32x4_t a); // VNE//q0,q0
+_NEON2SSE_INLINE int32x4_t vnegq_s32(int32x4_t a) // VNE//q0,q0
+{
+ __m128i zero;
+ zero = _mm_setzero_si128 ();
+ return _mm_sub_epi32 (zero, a);
+} //or _mm_sign_epi32 (a, negative numbers vector)
+
+float32x4_t vnegq_f32(float32x4_t a); // VNE//q0,q0
+_NEON2SSE_INLINE float32x4_t vnegq_f32(float32x4_t a) // VNE//q0,q0
+{
+ _NEON2SSE_ALIGN_16 int32_t c80000000[4] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+ return _mm_xor_ps (a, *(__m128*) c80000000);
+}
+
+//************** Saturating Negate: sat(Vd[i] = - Va[i]) **************************
+//***************************************************************************************
+//For signed-integer data types, the negation of the most negative value can't be produced without saturation, while with saturation it is max positive
+
+int8x16_t vqnegq_s8(int8x16_t a); // VQNE//q0,q0
+_NEON2SSE_INLINE int8x16_t vqnegq_s8(int8x16_t a) // VQNE//q0,q0
+{
+ __m128i zero;
+ zero = _mm_setzero_si128 ();
+ return _mm_subs_epi8 (zero, a); //saturating substraction
+}
+
+int16x8_t vqnegq_s16(int16x8_t a); // VQNE//q0,q0
+_NEON2SSE_INLINE int16x8_t vqnegq_s16(int16x8_t a) // VQNE//q0,q0
+{
+ __m128i zero;
+ zero = _mm_setzero_si128 ();
+ return _mm_subs_epi16 (zero, a); //saturating substraction
+}
+
+int32x4_t vqnegq_s32(int32x4_t a); // VQNE//q0,q0
+_NEON2SSE_INLINE int32x4_t vqnegq_s32(int32x4_t a) // VQNE//q0,q0
+{ //solution may be not optimal compared with a serial
+ __m128i c80000000, zero, sub, cmp;
+ c80000000 = _mm_set1_epi32 (0x80000000); //most negative value
+ zero = _mm_setzero_si128 ();
+ sub = _mm_sub_epi32 (zero, a); //substraction
+ cmp = _mm_cmpeq_epi32 (a, c80000000);
+ return _mm_xor_si128 (sub, cmp);
+}
+
+//****************** Count leading zeros ********************************
+//**************************************************************************
+//no corresponding vector intrinsics in IA32, need to implement it. While the implementation is effective for 8 bits, it may be not for 16 and 32 bits
+
+#if defined(USE_SSSE3)
+int8x16_t vclzq_s8(int8x16_t a); // VCLZ.I8 q0,q0
+_NEON2SSE_INLINE int8x16_t vclzq_s8(int8x16_t a)
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_CLZ[16] = { /* 0 */ 4,/* 1 */ 3,/* 2 */ 2,/* 3 */ 2,
+ /* 4 */ 1,/* 5 */ 1,/* 6 */ 1,/* 7 */ 1,
+ /* 8 */ 0,/* 9 */ 0,/* a */ 0,/* b */ 0,
+ /* c */ 0,/* d */ 0,/* e */ 0,/* f */ 0};
+ __m128i maskLOW, c4, lowclz, mask, hiclz;
+ maskLOW = _mm_set1_epi8(0x0f); //low 4 bits, don't need masking low to avoid zero if MSB is set - it happens automatically
+ c4 = _mm_set1_epi8(4);
+ lowclz = _mm_shuffle_epi8( *(__m128i*)mask_CLZ, a); //uses low 4 bits anyway
+ mask = _mm_srli_epi16(a, 4); //get high 4 bits as low bits
+ mask = _mm_and_si128(mask, maskLOW); //low 4 bits, need masking to avoid zero if MSB is set
+ hiclz = _mm_shuffle_epi8( *(__m128i*) mask_CLZ, mask); //uses low 4 bits anyway
+ mask = _mm_cmpeq_epi8(hiclz, c4); // shows the need to add lowclz zeros
+ lowclz = _mm_and_si128(lowclz,mask);
+ return _mm_add_epi8(lowclz, hiclz);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vclzq_s16(int16x8_t a); // VCLZ.I16 q0,q0
+_NEON2SSE_INLINE int16x8_t vclzq_s16(int16x8_t a)
+{
+ __m128i c7, res8x16, res8x16_swap;
+ _NEON2SSE_ALIGN_16 int8_t mask8_sab[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
+ _NEON2SSE_ALIGN_16 uint16_t mask8bit[8] = {0x00ff, 0x00ff, 0x00ff, 0x00ff,0x00ff, 0x00ff, 0x00ff, 0x00ff};
+ c7 = _mm_srli_epi16(*(__m128i*)mask8bit, 5); //7
+ res8x16 = vclzq_s8(a);
+ res8x16_swap = _mm_shuffle_epi8 (res8x16, *(__m128i*) mask8_sab); //horisontal pairs swap
+ res8x16 = _mm_and_si128(res8x16, *(__m128i*)mask8bit); //lowclz
+ res8x16_swap = _mm_and_si128(res8x16_swap, *(__m128i*)mask8bit); //hiclz
+ c7 = _mm_cmpgt_epi16(res8x16_swap, c7); // shows the need to add lowclz zeros
+ res8x16 = _mm_and_si128(res8x16, c7); //lowclz
+ return _mm_add_epi16(res8x16_swap, res8x16);
+}
+#endif
+
+int32x4_t vclzq_s32(int32x4_t a); // VCLZ.I32 q0,q0
+_NEON2SSE_INLINE int32x4_t vclzq_s32(int32x4_t a)
+{
+ __m128i c55555555, c33333333, c0f0f0f0f, c3f, c32, tmp, tmp1, res;
+ c55555555 = _mm_set1_epi32(0x55555555);
+ c33333333 = _mm_set1_epi32(0x33333333);
+ c0f0f0f0f = _mm_set1_epi32(0x0f0f0f0f);
+ c3f = _mm_set1_epi32(0x3f);
+ c32 = _mm_set1_epi32(32);
+ tmp = _mm_srli_epi32(a, 1);
+ res = _mm_or_si128(tmp, a); //atmp[i] |= (atmp[i] >> 1);
+ tmp = _mm_srli_epi32(res, 2);
+ res = _mm_or_si128(tmp, res); //atmp[i] |= (atmp[i] >> 2);
+ tmp = _mm_srli_epi32(res, 4);
+ res = _mm_or_si128(tmp, res); //atmp[i] |= (atmp[i] >> 4);
+ tmp = _mm_srli_epi32(res, 8);
+ res = _mm_or_si128(tmp, res); //atmp[i] |= (atmp[i] >> 8);
+ tmp = _mm_srli_epi32(res, 16);
+ res = _mm_or_si128(tmp, res); //atmp[i] |= (atmp[i] >> 16);
+
+ tmp = _mm_srli_epi32(res, 1);
+ tmp = _mm_and_si128(tmp, c55555555);
+ res = _mm_sub_epi32(res, tmp); //atmp[i] -= ((atmp[i] >> 1) & 0x55555555);
+
+ tmp = _mm_srli_epi32(res, 2);
+ tmp = _mm_and_si128(tmp, c33333333);
+ tmp1 = _mm_and_si128(res, c33333333);
+ res = _mm_add_epi32(tmp, tmp1); //atmp[i] = (((atmp[i] >> 2) & 0x33333333) + (atmp[i] & 0x33333333));
+
+ tmp = _mm_srli_epi32(res, 4);
+ tmp = _mm_add_epi32(tmp, res);
+ res = _mm_and_si128(tmp, c0f0f0f0f); //atmp[i] = (((atmp[i] >> 4) + atmp[i]) & 0x0f0f0f0f);
+
+ tmp = _mm_srli_epi32(res, 8);
+ res = _mm_add_epi32(tmp, res); //atmp[i] += (atmp[i] >> 8);
+
+ tmp = _mm_srli_epi32(res, 16);
+ res = _mm_add_epi32(tmp, res); //atmp[i] += (atmp[i] >> 16);
+
+ res = _mm_and_si128(res, c3f); //atmp[i] = atmp[i] & 0x0000003f;
+
+ return _mm_sub_epi32(c32, res); //res[i] = 32 - atmp[i];
+}
+
+#if defined(USE_SSSE3)
+uint8x16_t vclzq_u8(uint8x16_t a); // VCLZ.I8 q0,q0
+#define vclzq_u8 vclzq_s8
+
+uint16x8_t vclzq_u16(uint16x8_t a); // VCLZ.I16 q0,q0
+#define vclzq_u16 vclzq_s16
+#endif
+
+uint32x4_t vclzq_u32(uint32x4_t a); // VCLZ.I32 q0,q0
+#define vclzq_u32 vclzq_s32
+
+//************** Count leading sign bits **************************
+//********************************************************************
+//VCLS (Vector Count Leading Sign bits) counts the number of consecutive bits following
+// the topmost bit, that are the same as the topmost bit, in each element in a vector
+//No corresponding vector intrinsics in IA32, need to implement it.
+//While the implementation is effective for 8 bits, it may be not for 16 and 32 bits
+
+#if defined(USE_SSSE3)
+int8x16_t vclsq_s8(int8x16_t a); // VCLS.S8 q0,q0
+_NEON2SSE_INLINE int8x16_t vclsq_s8(int8x16_t a)
+{
+ __m128i cff, c80, c1, a_mask, a_neg, a_pos, a_comb;
+ cff = _mm_cmpeq_epi8 (a,a); //0xff
+ c80 = _mm_set1_epi8(0x80);
+ c1 = _mm_set1_epi8(1);
+ a_mask = _mm_and_si128(a, c80);
+ a_mask = _mm_cmpeq_epi8(a_mask, c80); //0xff if negative input and 0 if positive
+ a_neg = _mm_xor_si128(a, cff);
+ a_neg = _mm_and_si128(a_mask, a_neg);
+ a_pos = _mm_andnot_si128(a_mask, a);
+ a_comb = _mm_or_si128(a_pos, a_neg);
+ a_comb = vclzq_s8(a_comb);
+ return _mm_sub_epi8(a_comb, c1);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8_t vclsq_s16(int16x8_t a); // VCLS.S16 q0,q0
+_NEON2SSE_INLINE int16x8_t vclsq_s16(int16x8_t a)
+{
+ __m128i cffff, c8000, c1, a_mask, a_neg, a_pos, a_comb;
+ cffff = _mm_cmpeq_epi16(a,a);
+ c8000 = _mm_slli_epi16(cffff, 15); //0x8000
+ c1 = _mm_srli_epi16(cffff,15); //0x1
+ a_mask = _mm_and_si128(a, c8000);
+ a_mask = _mm_cmpeq_epi16(a_mask, c8000); //0xffff if negative input and 0 if positive
+ a_neg = _mm_xor_si128(a, cffff);
+ a_neg = _mm_and_si128(a_mask, a_neg);
+ a_pos = _mm_andnot_si128(a_mask, a);
+ a_comb = _mm_or_si128(a_pos, a_neg);
+ a_comb = vclzq_s16(a_comb);
+ return _mm_sub_epi16(a_comb, c1);
+}
+#endif
+
+int32x4_t vclsq_s32(int32x4_t a); // VCLS.S32 q0,q0
+_NEON2SSE_INLINE int32x4_t vclsq_s32(int32x4_t a)
+{
+ __m128i cffffffff, c80000000, c1, a_mask, a_neg, a_pos, a_comb;
+ cffffffff = _mm_cmpeq_epi32(a,a);
+ c80000000 = _mm_slli_epi32(cffffffff, 31); //0x80000000
+ c1 = _mm_srli_epi32(cffffffff,31); //0x1
+ a_mask = _mm_and_si128(a, c80000000);
+ a_mask = _mm_cmpeq_epi32(a_mask, c80000000); //0xffffffff if negative input and 0 if positive
+ a_neg = _mm_xor_si128(a, cffffffff);
+ a_neg = _mm_and_si128(a_mask, a_neg);
+ a_pos = _mm_andnot_si128(a_mask, a);
+ a_comb = _mm_or_si128(a_pos, a_neg);
+ a_comb = vclzq_s32(a_comb);
+ return _mm_sub_epi32(a_comb, c1);
+}
+
+//************************* Count number of set bits ********************************
+//*************************************************************************************
+//No corresponding SIMD solution. One option is to get a elements, convert it to 32 bits and then use SSE4.2 _mm_popcnt__u32 (unsigned int v) for each element
+//another option is to do the following algorithm:
+
+#if defined(USE_SSSE3)
+uint8x16_t vcntq_u8(uint8x16_t a); // VCNT.8 q0,q0
+_NEON2SSE_INLINE uint8x16_t vcntq_u8(uint8x16_t a)
+{
+ _NEON2SSE_ALIGN_16 int8_t mask_POPCOUNT[16] = { /* 0 */ 0,/* 1 */ 1,/* 2 */ 1,/* 3 */ 2,
+ /* 4 */ 1,/* 5 */ 2,/* 6 */ 2,/* 7 */ 3,
+ /* 8 */ 1,/* 9 */ 2,/* a */ 2,/* b */ 3,
+ /* c */ 2,/* d */ 3,/* e */ 3,/* f */ 4};
+ __m128i maskLOW, mask, lowpopcnt, hipopcnt;
+ maskLOW = _mm_set1_epi8(0x0f); //low 4 bits, need masking to avoid zero if MSB is set
+ mask = _mm_and_si128(a, maskLOW);
+ lowpopcnt = _mm_shuffle_epi8( *(__m128i*)mask_POPCOUNT, mask); //uses low 4 bits anyway
+ mask = _mm_srli_epi16(a, 4); //get high 4 bits as low bits
+ mask = _mm_and_si128(mask, maskLOW); //low 4 bits, need masking to avoid zero if MSB is set
+ hipopcnt = _mm_shuffle_epi8( *(__m128i*) mask_POPCOUNT, mask); //uses low 4 bits anyway
+ return _mm_add_epi8(lowpopcnt, hipopcnt);
+}
+#endif
+
+#if defined(USE_SSSE3)
+int8x16_t vcntq_s8(int8x16_t a); // VCNT.8 q0,q0
+#define vcntq_s8 vcntq_u8
+
+poly8x16_t vcntq_p8(poly8x16_t a); // VCNT.8 q0,q0
+#define vcntq_p8 vcntq_u8
+#endif
+
+//**************************************************************************************
+//*********************** Logical operations ****************************************
+//**************************************************************************************
+//************************** Bitwise not ***********************************
+//several Bitwise not implementations possible for SIMD. Eg "xor" with all ones, but the following one gives good performance
+
+int8x16_t vmvnq_s8(int8x16_t a); // VMVN q0,q0
+_NEON2SSE_INLINE int8x16_t vmvnq_s8(int8x16_t a) // VMVN q0,q0
+{
+ __m128i c1;
+ c1 = _mm_cmpeq_epi8 (a,a); //0xff
+ return _mm_andnot_si128 (a, c1);
+}
+
+int16x8_t vmvnq_s16(int16x8_t a); // VMVN q0,q0
+_NEON2SSE_INLINE int16x8_t vmvnq_s16(int16x8_t a) // VMVN q0,q0
+{
+ __m128i c1;
+ c1 = _mm_cmpeq_epi16 (a,a); //0xffff
+ return _mm_andnot_si128 (a, c1);
+}
+
+int32x4_t vmvnq_s32(int32x4_t a); // VMVN q0,q0
+_NEON2SSE_INLINE int32x4_t vmvnq_s32(int32x4_t a) // VMVN q0,q0
+{
+ __m128i c1;
+ c1 = _mm_cmpeq_epi32 (a,a); //0xffffffff
+ return _mm_andnot_si128 (a, c1);
+}
+
+uint8x16_t vmvnq_u8(uint8x16_t a); // VMVN q0,q0
+#define vmvnq_u8 vmvnq_s8
+
+uint16x8_t vmvnq_u16(uint16x8_t a); // VMVN q0,q0
+#define vmvnq_u16 vmvnq_s16
+
+uint32x4_t vmvnq_u32(uint32x4_t a); // VMVN q0,q0
+#define vmvnq_u32 vmvnq_s32
+
+poly8x16_t vmvnq_p8(poly8x16_t a); // VMVN q0,q0
+#define vmvnq_p8 vmvnq_u8
+
+//****************** Bitwise and ***********************
+//******************************************************
+
+int8x16_t vandq_s8(int8x16_t a, int8x16_t b); // VAND q0,q0,q0
+#define vandq_s8 _mm_and_si128
+
+int16x8_t vandq_s16(int16x8_t a, int16x8_t b); // VAND q0,q0,q0
+#define vandq_s16 _mm_and_si128
+
+int32x4_t vandq_s32(int32x4_t a, int32x4_t b); // VAND q0,q0,q0
+#define vandq_s32 _mm_and_si128
+
+int64x2_t vandq_s64(int64x2_t a, int64x2_t b); // VAND q0,q0,q0
+#define vandq_s64 _mm_and_si128
+
+uint8x16_t vandq_u8(uint8x16_t a, uint8x16_t b); // VAND q0,q0,q0
+#define vandq_u8 _mm_and_si128
+
+uint16x8_t vandq_u16(uint16x8_t a, uint16x8_t b); // VAND q0,q0,q0
+#define vandq_u16 _mm_and_si128
+
+uint32x4_t vandq_u32(uint32x4_t a, uint32x4_t b); // VAND q0,q0,q0
+#define vandq_u32 _mm_and_si128
+
+uint64x2_t vandq_u64(uint64x2_t a, uint64x2_t b); // VAND q0,q0,q0
+#define vandq_u64 _mm_and_si128
+
+//******************** Bitwise or *********************************
+//******************************************************************
+
+int8x16_t vorrq_s8(int8x16_t a, int8x16_t b); // VORR q0,q0,q0
+#define vorrq_s8 _mm_or_si128
+
+int16x8_t vorrq_s16(int16x8_t a, int16x8_t b); // VORR q0,q0,q0
+#define vorrq_s16 _mm_or_si128
+
+int32x4_t vorrq_s32(int32x4_t a, int32x4_t b); // VORR q0,q0,q0
+#define vorrq_s32 _mm_or_si128
+
+int64x2_t vorrq_s64(int64x2_t a, int64x2_t b); // VORR q0,q0,q0
+#define vorrq_s64 _mm_or_si128
+
+uint8x16_t vorrq_u8(uint8x16_t a, uint8x16_t b); // VORR q0,q0,q0
+#define vorrq_u8 _mm_or_si128
+
+uint16x8_t vorrq_u16(uint16x8_t a, uint16x8_t b); // VORR q0,q0,q0
+#define vorrq_u16 _mm_or_si128
+
+uint32x4_t vorrq_u32(uint32x4_t a, uint32x4_t b); // VORR q0,q0,q0
+#define vorrq_u32 _mm_or_si128
+
+uint64x2_t vorrq_u64(uint64x2_t a, uint64x2_t b); // VORR q0,q0,q0
+#define vorrq_u64 _mm_or_si128
+
+//************* Bitwise exclusive or (EOR or XOR) ******************
+//*******************************************************************
+
+int8x16_t veorq_s8(int8x16_t a, int8x16_t b); // VEOR q0,q0,q0
+#define veorq_s8 _mm_xor_si128
+
+int16x8_t veorq_s16(int16x8_t a, int16x8_t b); // VEOR q0,q0,q0
+#define veorq_s16 _mm_xor_si128
+
+int32x4_t veorq_s32(int32x4_t a, int32x4_t b); // VEOR q0,q0,q0
+#define veorq_s32 _mm_xor_si128
+
+int64x2_t veorq_s64(int64x2_t a, int64x2_t b); // VEOR q0,q0,q0
+#define veorq_s64 _mm_xor_si128
+
+uint8x16_t veorq_u8(uint8x16_t a, uint8x16_t b); // VEOR q0,q0,q0
+#define veorq_u8 _mm_xor_si128
+
+uint16x8_t veorq_u16(uint16x8_t a, uint16x8_t b); // VEOR q0,q0,q0
+#define veorq_u16 _mm_xor_si128
+
+uint32x4_t veorq_u32(uint32x4_t a, uint32x4_t b); // VEOR q0,q0,q0
+#define veorq_u32 _mm_xor_si128
+
+uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b); // VEOR q0,q0,q0
+#define veorq_u64 _mm_xor_si128
+
+//********************** Bit Clear **********************************
+//*******************************************************************
+//Logical AND complement (AND negation or AND NOT)
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+//notice arguments "swap"
+
+int8x16_t vbicq_s8(int8x16_t a, int8x16_t b); // VBIC q0,q0,q0
+#define vbicq_s8(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+int16x8_t vbicq_s16(int16x8_t a, int16x8_t b); // VBIC q0,q0,q0
+#define vbicq_s16(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+int32x4_t vbicq_s32(int32x4_t a, int32x4_t b); // VBIC q0,q0,q0
+#define vbicq_s32(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+int64x2_t vbicq_s64(int64x2_t a, int64x2_t b); // VBIC q0,q0,q0
+#define vbicq_s64(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+uint8x16_t vbicq_u8(uint8x16_t a, uint8x16_t b); // VBIC q0,q0,q0
+#define vbicq_u8(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+uint16x8_t vbicq_u16(uint16x8_t a, uint16x8_t b); // VBIC q0,q0,q0
+#define vbicq_u16(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+uint32x4_t vbicq_u32(uint32x4_t a, uint32x4_t b); // VBIC q0,q0,q0
+#define vbicq_u32(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+uint64x2_t vbicq_u64(uint64x2_t a, uint64x2_t b); // VBIC q0,q0,q0
+#define vbicq_u64(a,b) _mm_andnot_si128 (b,a) //notice arguments "swap"
+
+//**************** Bitwise OR complement ********************************
+//**************************************** ********************************
+//no exact IA 32 match, need to implement it as following
+
+int8x16_t vornq_s8(int8x16_t a, int8x16_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vornq_s8(int8x16_t a, int8x16_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_s8( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+
+int16x8_t vornq_s16(int16x8_t a, int16x8_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE int16x8_t vornq_s16(int16x8_t a, int16x8_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_s16( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+
+int32x4_t vornq_s32(int32x4_t a, int32x4_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE int32x4_t vornq_s32(int32x4_t a, int32x4_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_s32( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+
+int64x2_t vornq_s64(int64x2_t a, int64x2_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE int64x2_t vornq_s64(int64x2_t a, int64x2_t b)
+{
+ __m128i c1, b1;
+ c1 = _mm_cmpeq_epi8 (a, a); //all ones 0xfffffff...fffff
+ b1 = _mm_andnot_si128 (b, c1);
+ return _mm_or_si128 (a, b1);
+}
+
+uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_u8( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+
+uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_s16( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+
+uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b); // VORN q0,q0,q0
+_NEON2SSE_INLINE uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b) // VORN q0,q0,q0
+{
+ __m128i b1;
+ b1 = vmvnq_u32( b); //bitwise not for b
+ return _mm_or_si128 (a, b1);
+}
+uint64x2_t vornq_u64(uint64x2_t a, uint64x2_t b); // VORN q0,q0,q0
+#define vornq_u64 vornq_s64
+
+//********************* Bitwise Select *****************************
+//******************************************************************
+//Note This intrinsic can compile to any of VBSL/VBIF/VBIT depending on register allocation.(?????????)
+
+//VBSL (Bitwise Select) selects each bit for the destination from the first operand if the
+//corresponding bit of the destination is 1, or from the second operand if the corresponding bit of the destination is 0.
+
+//VBIF (Bitwise Insert if False) inserts each bit from the first operand into the destination
+//if the corresponding bit of the second operand is 0, otherwise leaves the destination bit unchanged
+
+//VBIT (Bitwise Insert if True) inserts each bit from the first operand into the destination
+//if the corresponding bit of the second operand is 1, otherwise leaves the destination bit unchanged.
+
+//VBSL only is implemented for SIMD
+
+int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c); // VBSL q0,q0,q0
+_NEON2SSE_INLINE int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) // VBSL q0,q0,q0
+{
+ __m128i sel1, sel2;
+ sel1 = _mm_and_si128 (a, b);
+ sel2 = _mm_andnot_si128 (a, c);
+ return _mm_or_si128 (sel1, sel2);
+}
+
+int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c); // VBSL q0,q0,q0
+#define vbslq_s16 vbslq_s8
+
+int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c); // VBSL q0,q0,q0
+#define vbslq_s32 vbslq_s8
+
+int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c); // VBSL q0,q0,q0
+#define vbslq_s64 vbslq_s8
+
+uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c); // VBSL q0,q0,q0
+#define vbslq_u8 vbslq_s8
+
+uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c); // VBSL q0,q0,q0
+#define vbslq_u16 vbslq_s8
+
+uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c); // VBSL q0,q0,q0
+#define vbslq_u32 vbslq_s8
+
+uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c); // VBSL q0,q0,q0
+#define vbslq_u64 vbslq_s8
+
+float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c); // VBSL q0,q0,q0
+_NEON2SSE_INLINE float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) // VBSL q0,q0,q0
+{
+ __m128 sel1, sel2;
+ sel1 = _mm_and_ps (*(__m128*)&a, b);
+ sel2 = _mm_andnot_ps (*(__m128*)&a, c);
+ return _mm_or_ps (sel1, sel2);
+}
+
+poly8x16_t vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c); // VBSL q0,q0,q0
+#define vbslq_p8 vbslq_u8
+
+poly16x8_t vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c); // VBSL q0,q0,q0
+#define vbslq_p16 vbslq_s8
+
+//************************************************************************************
+//**************** Transposition operations ****************************************
+//************************************************************************************
+//***************** Vector Transpose ************************************************
+//************************************************************************************
+//VTRN (Vector Transpose) treats the elements of its operand vectors as elements of 2 x 2 matrices, and transposes the matrices.
+// making the result look as (a0, b0, a2, b2, a4, b4,....) (a1, b1, a3, b3, a5, b5,.....)
+
+#if defined(USE_SSSE3)
+//int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b); // VTRN.8 q0,q0
+_NEON2SSE_INLINE int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b) // VTRN.8 q0,q0
+{
+ int8x16x2_t r8x16;
+ __m128i a_sh, b_sh;
+ _NEON2SSE_ALIGN_16 int8_t mask8_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3,5, 7, 9, 11, 13, 15};
+ a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15
+ b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask8_even_odd); //b0, b2, b4, b6, b8, b10, b12, b14, b1, b3, b5, b7, b9, b11, b13, b15
+
+ r8x16.val[0] = _mm_unpacklo_epi8(a_sh, b_sh); //(a0, b0, a2, b2, a4, b4, a6, b6, a8,b8, a10,b10, a12,b12, a14,b14)
+ r8x16.val[1] = _mm_unpackhi_epi8(a_sh, b_sh); // (a1, b1, a3, b3, a5, b5, a7, b7, a9,b9, a11,b11, a13,b13, a15,b15)
+ return r8x16;
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b); // VTRN.16 q0,q0
+_NEON2SSE_INLINE int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b) // VTRN.16 q0,q0
+{
+ int16x8x2_t v16x8;
+ __m128i a_sh, b_sh;
+ _NEON2SSE_ALIGN_16 int8_t mask16_even_odd[16] = { 0,1, 4,5, 8,9, 12,13, 2,3, 6,7, 10,11, 14,15};
+ a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask16_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7
+ b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask16_even_odd); //b0, b2, b4, b6, b1, b3, b5, b7
+ v16x8.val[0] = _mm_unpacklo_epi16(a_sh, b_sh); //a0, b0, a2, b2, a4, b4, a6, b6
+ v16x8.val[1] = _mm_unpackhi_epi16(a_sh, b_sh); //a1, b1, a3, b3, a5, b5, a7, b7
+ return v16x8;
+}
+#endif
+
+int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b); // VTRN.32 q0,q0
+_NEON2SSE_INLINE int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b) // VTRN.32 q0,q0
+{ //may be not optimal solution compared with serial
+ int32x4x2_t v32x4;
+ __m128i a_sh, b_sh;
+ a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3
+ b_sh = _mm_shuffle_epi32 (b, 216); //b0, b2, b1, b3
+
+ v32x4.val[0] = _mm_unpacklo_epi32(a_sh, b_sh); //a0, b0, a2, b2
+ v32x4.val[1] = _mm_unpackhi_epi32(a_sh, b_sh); //a1, b1, a3, b3
+ return v32x4;
+}
+
+#if defined(USE_SSSE3)
+uint8x16x2_t vtrnq_u8(uint8x16_t a, uint8x16_t b); // VTRN.8 q0,q0
+#define vtrnq_u8 vtrnq_s8
+
+uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b); // VTRN.16 q0,q0
+#define vtrnq_u16 vtrnq_s16
+#endif
+
+uint32x4x2_t vtrnq_u32(uint32x4_t a, uint32x4_t b); // VTRN.32 q0,q0
+#define vtrnq_u32 vtrnq_s32
+
+float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b); // VTRN.32 q0,q0
+_NEON2SSE_INLINE float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b) // VTRN.32 q0,q0
+{ //may be not optimal solution compared with serial
+ float32x4x2_t f32x4;
+ __m128 a_sh, b_sh;
+ a_sh = _mm_shuffle_ps (a, a, _MM_SHUFFLE(3,1, 2, 0)); //a0, a2, a1, a3, need to check endiness
+ b_sh = _mm_shuffle_ps (b, b, _MM_SHUFFLE(3,1, 2, 0)); //b0, b2, b1, b3, need to check endiness
+
+ f32x4.val[0] = _mm_unpacklo_ps(a_sh, b_sh); //a0, b0, a2, b2
+ f32x4.val[1] = _mm_unpackhi_ps(a_sh, b_sh); //a1, b1, a3, b3
+ return f32x4;
+}
+
+#if defined(USE_SSSE3)
+poly8x16x2_t vtrnq_p8(poly8x16_t a, poly8x16_t b); // VTRN.8 q0,q0
+#define vtrnq_p8 vtrnq_s8
+
+poly16x8x2_t vtrnq_p16(poly16x8_t a, poly16x8_t b); // VTRN.16 q0,q0
+#define vtrnq_p16 vtrnq_s16
+#endif
+
+//***************** Interleave elements ***************************
+//*****************************************************************
+//output has (a0,b0,a1,b1, a2,b2,.....)
+
+int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b); // VZIP.8 q0,q0
+_NEON2SSE_INLINE int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b) // VZIP.8 q0,q0
+{
+ int8x16x2_t r8x16;
+ r8x16.val[0] = _mm_unpacklo_epi8(a, b);
+ r8x16.val[1] = _mm_unpackhi_epi8(a, b);
+ return r8x16;
+}
+
+int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b); // VZIP.16 q0,q0
+_NEON2SSE_INLINE int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b) // VZIP.16 q0,q0
+{
+ int16x8x2_t r16x8;
+ r16x8.val[0] = _mm_unpacklo_epi16(a, b);
+ r16x8.val[1] = _mm_unpackhi_epi16(a, b);
+ return r16x8;
+}
+
+int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b); // VZIP.32 q0,q0
+_NEON2SSE_INLINE int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b) // VZIP.32 q0,q0
+{
+ int32x4x2_t r32x4;
+ r32x4.val[0] = _mm_unpacklo_epi32(a, b);
+ r32x4.val[1] = _mm_unpackhi_epi32(a, b);
+ return r32x4;
+}
+
+uint8x16x2_t vzipq_u8(uint8x16_t a, uint8x16_t b); // VZIP.8 q0,q0
+#define vzipq_u8 vzipq_s8
+
+uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b); // VZIP.16 q0,q0
+#define vzipq_u16 vzipq_s16
+
+uint32x4x2_t vzipq_u32(uint32x4_t a, uint32x4_t b); // VZIP.32 q0,q0
+#define vzipq_u32 vzipq_s32
+
+float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b); // VZIP.32 q0,q0
+_NEON2SSE_INLINE float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b) // VZIP.32 q0,q0
+{
+ float32x4x2_t f32x4;
+ f32x4.val[0] = _mm_unpacklo_ps ( a, b);
+ f32x4.val[1] = _mm_unpackhi_ps ( a, b);
+ return f32x4;
+}
+
+poly8x16x2_t vzipq_p8(poly8x16_t a, poly8x16_t b); // VZIP.8 q0,q0
+#define vzipq_p8 vzipq_u8
+
+poly16x8x2_t vzipq_p16(poly16x8_t a, poly16x8_t b); // VZIP.16 q0,q0
+#define vzipq_p16 vzipq_u16
+
+//*********************** De-Interleave elements *************************
+//*************************************************************************
+//As the result of these functions first val contains (a0,a2,a4,....,b0,b2, b4,...) and the second val (a1,a3,a5,....b1,b3,b5...)
+//no such functions in IA32 SIMD, shuffle is required
+
+#if defined(USE_SSSE3)
+int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b); // VUZP.8 q0,q0
+_NEON2SSE_INLINE int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b) // VUZP.8 q0,q0
+{
+ int8x16x2_t v8x16;
+ __m128i a_sh, b_sh;
+ _NEON2SSE_ALIGN_16 int8_t mask8_even_odd[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3,5, 7, 9, 11, 13, 15};
+ a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15
+ b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask8_even_odd); //b0, b2, b4, b6, b8, b10, b12, b14, b1, b3, b5, b7, b9, b11, b13, b15
+ //we need unpack64 to combine lower (upper) 64 bits from a with lower (upper) 64 bits from b
+ v8x16.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); ///a0, a2, a4, a6, a8, a10, a12, a14, b0, b2, b4, b6, b8, b10, b12, b14,
+ v8x16.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, a5, a7, a9, a11, a13, a15, b1, b3, b5, b7, b9, b11, b13, b15
+ return v8x16;
+}
+#endif
+
+#if defined(USE_SSSE3)
+int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b); // VUZP.16 q0,q0
+_NEON2SSE_INLINE int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b) // VUZP.16 q0,q0
+{
+ int16x8x2_t v16x8;
+ __m128i a_sh, b_sh;
+ _NEON2SSE_ALIGN_16 int8_t mask16_even_odd[16] = { 0,1, 4,5, 8,9, 12,13, 2,3, 6,7, 10,11, 14,15};
+ a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask16_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7
+ b_sh = _mm_shuffle_epi8 (b, *(__m128i*)mask16_even_odd); //b0, b2, b4, b6, b1, b3, b5, b7
+ v16x8.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, a4, a6, b0, b2, b4, b6
+ v16x8.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, a5, a7, b1, b3, b5, b7
+ return v16x8;
+}
+#endif
+
+int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b); // VUZP.32 q0,q0
+_NEON2SSE_INLINE int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b) // VUZP.32 q0,q0
+{ //may be not optimal solution compared with serial
+ int32x4x2_t v32x4;
+ __m128i a_sh, b_sh;
+ a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3
+ b_sh = _mm_shuffle_epi32 (b, 216); //b0, b2, b1, b3
+
+ v32x4.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, b0, b2
+ v32x4.val[1] = _mm_unpackhi_epi64(a_sh, b_sh); //a1, a3, b1, b3
+ return v32x4;
+}
+
+#if defined(USE_SSSE3)
+uint8x16x2_t vuzpq_u8(uint8x16_t a, uint8x16_t b); // VUZP.8 q0,q0
+#define vuzpq_u8 vuzpq_s8
+
+uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b); // VUZP.16 q0,q0
+#define vuzpq_u16 vuzpq_s16
+#endif
+
+uint32x4x2_t vuzpq_u32(uint32x4_t a, uint32x4_t b); // VUZP.32 q0,q0
+#define vuzpq_u32 vuzpq_s32
+
+float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b); // VUZP.32 q0,q0
+_NEON2SSE_INLINE float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b) // VUZP.32 q0,q0
+{
+ float32x4x2_t v32x4;
+ v32x4.val[0] = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2,0, 2, 0)); //a0, a2, b0, b2 , need to check endianess however
+ v32x4.val[1] = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3,1, 3, 1)); //a1, a3, b1, b3, need to check endianess however
+ return v32x4;
+}
+
+#if defined(USE_SSSE3)
+poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0
+#define vuzpq_p8 vuzpq_u8
+
+poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0
+#define vuzpq_p16 vuzpq_u16
+#endif
+
+//##############################################################################################
+//*********************** Reinterpret cast intrinsics.******************************************
+//##############################################################################################
+// Not a part of oficial NEON instruction set but available in gcc compiler *********************
+
+poly8x16_t vreinterpretq_p8_u32 (uint32x4_t t);
+#define vreinterpretq_p8_u32
+
+poly8x16_t vreinterpretq_p8_u16 (uint16x8_t t);
+#define vreinterpretq_p8_u16
+
+poly8x16_t vreinterpretq_p8_u8 (uint8x16_t t);
+#define vreinterpretq_p8_u8
+
+poly8x16_t vreinterpretq_p8_s32 (int32x4_t t);
+#define vreinterpretq_p8_s32
+
+poly8x16_t vreinterpretq_p8_s16 (int16x8_t t);
+#define vreinterpretq_p8_s16
+
+poly8x16_t vreinterpretq_p8_s8 (int8x16_t t);
+#define vreinterpretq_p8_s8
+
+poly8x16_t vreinterpretq_p8_u64 (uint64x2_t t);
+#define vreinterpretq_p8_u64
+
+poly8x16_t vreinterpretq_p8_s64 (int64x2_t t);
+#define vreinterpretq_p8_s64
+
+poly8x16_t vreinterpretq_p8_f32 (float32x4_t t);
+#define vreinterpretq_p8_f32(t) _M128i(t)
+
+poly8x16_t vreinterpretq_p8_p16 (poly16x8_t t);
+#define vreinterpretq_p8_p16
+
+poly16x8_t vreinterpretq_p16_u32 (uint32x4_t t);
+#define vreinterpretq_p16_u32
+
+poly16x8_t vreinterpretq_p16_u16 (uint16x8_t t);
+#define vreinterpretq_p16_u16
+
+poly16x8_t vreinterpretq_p16_s32 (int32x4_t t);
+#define vreinterpretq_p16_s32
+
+poly16x8_t vreinterpretq_p16_s16 (int16x8_t t);
+#define vreinterpretq_p16_s16
+
+poly16x8_t vreinterpretq_p16_s8 (int8x16_t t);
+#define vreinterpretq_p16_s8
+
+poly16x8_t vreinterpretq_p16_u64 (uint64x2_t t);
+#define vreinterpretq_p16_u64
+
+poly16x8_t vreinterpretq_p16_s64 (int64x2_t t);
+#define vreinterpretq_p16_s64
+
+poly16x8_t vreinterpretq_p16_f32 (float32x4_t t);
+#define vreinterpretq_p16_f32(t) _M128i(t)
+
+poly16x8_t vreinterpretq_p16_p8 (poly8x16_t t);
+#define vreinterpretq_p16_p8 vreinterpretq_s16_p8
+
+//**** Integer to float ******
+
+float32x4_t vreinterpretq_f32_u32 (uint32x4_t t);
+#define vreinterpretq_f32_u32(t) *(__m128*)&(t)
+
+float32x4_t vreinterpretq_f32_u16 (uint16x8_t t);
+#define vreinterpretq_f32_u16 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_u8 (uint8x16_t t);
+#define vreinterpretq_f32_u8 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_s32 (int32x4_t t);
+#define vreinterpretq_f32_s32 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_s16 (int16x8_t t);
+#define vreinterpretq_f32_s16 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_s8 (int8x16_t t);
+#define vreinterpretq_f32_s8 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_u64 (uint64x2_t t);
+#define vreinterpretq_f32_u64 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_s64 (int64x2_t t);
+#define vreinterpretq_f32_s64 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_p16 (poly16x8_t t);
+#define vreinterpretq_f32_p16 vreinterpretq_f32_u32
+
+float32x4_t vreinterpretq_f32_p8 (poly8x16_t t);
+#define vreinterpretq_f32_p8 vreinterpretq_f32_u32
+
+//*** Integer type conversions ******************
+//no conversion necessary for the following functions because it is same data type
+
+int64x2_t vreinterpretq_s64_u32 (uint32x4_t t);
+#define vreinterpretq_s64_u32
+
+int64x2_t vreinterpretq_s64_s16 (uint16x8_t t);
+#define vreinterpretq_s64_s16
+
+int64x2_t vreinterpretq_s64_u8 (uint8x16_t t);
+#define vreinterpretq_s64_u8
+
+int64x2_t vreinterpretq_s64_s32 (int32x4_t t);
+#define vreinterpretq_s64_s32
+
+int64x2_t vreinterpretq_s64_u16 (int16x8_t t);
+#define vreinterpretq_s64_u16
+
+int64x2_t vreinterpretq_s64_s8 (int8x16_t t);
+#define vreinterpretq_s64_s8
+
+int64x2_t vreinterpretq_s64_u64 (uint64x2_t t);
+#define vreinterpretq_s64_u64
+
+int64x2_t vreinterpretq_s64_f32 (float32x4_t t);
+#define vreinterpretq_s64_f32(t) _M128i(t)
+
+int64x2_t vreinterpretq_s64_p16 (poly16x8_t t);
+#define vreinterpretq_s64_p16
+
+int64x2_t vreinterpretq_s64_p8 (poly8x16_t t);
+#define vreinterpretq_s64_p8
+
+uint64x2_t vreinterpretq_u64_u32 (uint32x4_t t);
+#define vreinterpretq_u64_u32
+
+uint64x2_t vreinterpretq_u64_u16 (uint16x8_t t);
+#define vreinterpretq_u64_u16
+
+uint64x2_t vreinterpretq_u64_u8 (uint8x16_t t);
+#define vreinterpretq_u64_u8
+
+uint64x2_t vreinterpretq_u64_s32 (int32x4_t t);
+#define vreinterpretq_u64_s32
+
+uint64x2_t vreinterpretq_u64_s16 (int16x8_t t);
+#define vreinterpretq_u64_s16
+
+uint64x2_t vreinterpretq_u64_s8 (int8x16_t t);
+#define vreinterpretq_u64_s8
+
+uint64x2_t vreinterpretq_u64_s64 (int64x2_t t);
+#define vreinterpretq_u64_s64
+
+uint64x2_t vreinterpretq_u64_f32 (float32x4_t t);
+#define vreinterpretq_u64_f32(t) _M128i(t)
+
+uint64x2_t vreinterpretq_u64_p16 (poly16x8_t t);
+#define vreinterpretq_u64_p16
+
+uint64x2_t vreinterpretq_u64_p8 (poly8x16_t t);
+#define vreinterpretq_u64_p8
+
+int8x16_t vreinterpretq_s8_u32 (uint32x4_t t);
+#define vreinterpretq_s8_u32
+
+int8x16_t vreinterpretq_s8_u16 (uint16x8_t t);
+#define vreinterpretq_s8_u16
+
+int8x16_t vreinterpretq_s8_u8 (uint8x16_t t);
+#define vreinterpretq_s8_u8
+
+int8x16_t vreinterpretq_s8_s32 (int32x4_t t);
+#define vreinterpretq_s8_s32
+
+int8x16_t vreinterpretq_s8_s16 (int16x8_t t);
+#define vreinterpretq_s8_s16
+
+int8x16_t vreinterpretq_s8_u64 (uint64x2_t t);
+#define vreinterpretq_s8_u64
+
+int8x16_t vreinterpretq_s8_s64 (int64x2_t t);
+#define vreinterpretq_s8_s64
+
+int8x16_t vreinterpretq_s8_f32 (float32x4_t t);
+#define vreinterpretq_s8_f32(t) _M128i(t)
+
+int8x16_t vreinterpretq_s8_p16 (poly16x8_t t);
+#define vreinterpretq_s8_p16
+
+int8x16_t vreinterpretq_s8_p8 (poly8x16_t t);
+#define vreinterpretq_s8_p8
+
+int16x8_t vreinterpretq_s16_u32 (uint32x4_t t);
+#define vreinterpretq_s16_u32
+
+int16x8_t vreinterpretq_s16_u16 (uint16x8_t t);
+#define vreinterpretq_s16_u16
+
+int16x8_t vreinterpretq_s16_u8 (uint8x16_t t);
+#define vreinterpretq_s16_u8
+
+int16x8_t vreinterpretq_s16_s32 (int32x4_t t);
+#define vreinterpretq_s16_s32
+
+int16x8_t vreinterpretq_s16_s8 (int8x16_t t);
+#define vreinterpretq_s16_s8
+
+int16x8_t vreinterpretq_s16_u64 (uint64x2_t t);
+#define vreinterpretq_s16_u64
+
+int16x8_t vreinterpretq_s16_s64 (int64x2_t t);
+#define vreinterpretq_s16_s64
+
+int16x8_t vreinterpretq_s16_f32 (float32x4_t t);
+#define vreinterpretq_s16_f32(t) _M128i(t)
+
+int16x8_t vreinterpretq_s16_p16 (poly16x8_t t);
+#define vreinterpretq_s16_p16
+
+int16x8_t vreinterpretq_s16_p8 (poly8x16_t t);
+#define vreinterpretq_s16_p8
+
+int32x4_t vreinterpretq_s32_u32 (uint32x4_t t);
+#define vreinterpretq_s32_u32
+
+int32x4_t vreinterpretq_s32_u16 (uint16x8_t t);
+#define vreinterpretq_s32_u16
+
+int32x4_t vreinterpretq_s32_u8 (uint8x16_t t);
+#define vreinterpretq_s32_u8
+
+int32x4_t vreinterpretq_s32_s16 (int16x8_t t);
+#define vreinterpretq_s32_s16
+
+int32x4_t vreinterpretq_s32_s8 (int8x16_t t);
+#define vreinterpretq_s32_s8
+
+int32x4_t vreinterpretq_s32_u64 (uint64x2_t t);
+#define vreinterpretq_s32_u64
+
+int32x4_t vreinterpretq_s32_s64 (int64x2_t t);
+#define vreinterpretq_s32_s64
+
+int32x4_t vreinterpretq_s32_f32 (float32x4_t t);
+#define vreinterpretq_s32_f32(t) _mm_castps_si128(t) //(*(__m128i*)&(t))
+
+int32x4_t vreinterpretq_s32_p16 (poly16x8_t t);
+#define vreinterpretq_s32_p16
+
+int32x4_t vreinterpretq_s32_p8 (poly8x16_t t);
+#define vreinterpretq_s32_p8
+
+uint8x16_t vreinterpretq_u8_u32 (uint32x4_t t);
+#define vreinterpretq_u8_u32
+
+uint8x16_t vreinterpretq_u8_u16 (uint16x8_t t);
+#define vreinterpretq_u8_u16
+
+uint8x16_t vreinterpretq_u8_s32 (int32x4_t t);
+#define vreinterpretq_u8_s32
+
+uint8x16_t vreinterpretq_u8_s16 (int16x8_t t);
+#define vreinterpretq_u8_s16
+
+uint8x16_t vreinterpretq_u8_s8 (int8x16_t t);
+#define vreinterpretq_u8_s8
+
+uint8x16_t vreinterpretq_u8_u64 (uint64x2_t t);
+#define vreinterpretq_u8_u64
+
+uint8x16_t vreinterpretq_u8_s64 (int64x2_t t);
+#define vreinterpretq_u8_s64
+
+uint8x16_t vreinterpretq_u8_f32 (float32x4_t t);
+#define vreinterpretq_u8_f32(t) _M128i(t)
+
+uint8x16_t vreinterpretq_u8_p16 (poly16x8_t t);
+#define vreinterpretq_u8_p16
+
+uint8x16_t vreinterpretq_u8_p8 (poly8x16_t t);
+#define vreinterpretq_u8_p8
+
+uint16x8_t vreinterpretq_u16_u32 (uint32x4_t t);
+#define vreinterpretq_u16_u32
+
+uint16x8_t vreinterpretq_u16_u8 (uint8x16_t t);
+#define vreinterpretq_u16_u8
+
+uint16x8_t vreinterpretq_u16_s32 (int32x4_t t);
+#define vreinterpretq_u16_s32
+
+uint16x8_t vreinterpretq_u16_s16 (int16x8_t t);
+#define vreinterpretq_u16_s16
+
+uint16x8_t vreinterpretq_u16_s8 (int8x16_t t);
+#define vreinterpretq_u16_s8
+
+uint16x8_t vreinterpretq_u16_u64 (uint64x2_t t);
+#define vreinterpretq_u16_u64
+
+uint16x8_t vreinterpretq_u16_s64 (int64x2_t t);
+#define vreinterpretq_u16_s64
+
+uint16x8_t vreinterpretq_u16_f32 (float32x4_t t);
+#define vreinterpretq_u16_f32(t) _M128i(t)
+
+uint16x8_t vreinterpretq_u16_p16 (poly16x8_t t);
+#define vreinterpretq_u16_p16
+
+uint16x8_t vreinterpretq_u16_p8 (poly8x16_t t);
+#define vreinterpretq_u16_p8
+
+uint32x4_t vreinterpretq_u32_u16 (uint16x8_t t);
+#define vreinterpretq_u32_u16
+
+uint32x4_t vreinterpretq_u32_u8 (uint8x16_t t);
+#define vreinterpretq_u32_u8
+
+uint32x4_t vreinterpretq_u32_s32 (int32x4_t t);
+#define vreinterpretq_u32_s32
+
+uint32x4_t vreinterpretq_u32_s16 (int16x8_t t);
+#define vreinterpretq_u32_s16
+
+uint32x4_t vreinterpretq_u32_s8 (int8x16_t t);
+#define vreinterpretq_u32_s8
+
+uint32x4_t vreinterpretq_u32_u64 (uint64x2_t t);
+#define vreinterpretq_u32_u64
+
+uint32x4_t vreinterpretq_u32_s64 (int64x2_t t);
+#define vreinterpretq_u32_s64
+
+uint32x4_t vreinterpretq_u32_f32 (float32x4_t t);
+#define vreinterpretq_u32_f32(t) _M128i(t)
+
+uint32x4_t vreinterpretq_u32_p16 (poly16x8_t t);
+#define vreinterpretq_u32_p16
+
+uint32x4_t vreinterpretq_u32_p8 (poly8x16_t t);
+#define vreinterpretq_u32_p8
+
+#endif /* NEON2SSE_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avx2intrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avx2intrin.h
new file mode 100644
index 0000000..d04c972
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avx2intrin.h
@@ -0,0 +1,1889 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX2INTRIN_H_INCLUDED
+#define _AVX2INTRIN_H_INCLUDED
+
+#ifndef __AVX2__
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __DISABLE_AVX2__
+#endif /* __AVX2__ */
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+ byte integers in the first 2 operands. Starting offsets within
+ operands are determined by the 3rd mask operand. */
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
+ (__v32qi)__Y, __M);
+}
+#else
+#define _mm256_mpsadbw_epu8(X, Y, M) \
+ ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi8 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi16 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi32 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
+ (__v4di)__B,
+ __N * 8);
+}
+#else
+/* In that case (__N*8) will be in vreg, and insn will not be matched. */
+/* Use define instead */
+#define _mm256_alignr_epi8(A, B, N) \
+ ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
+{
+ return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
+ (__v32qi)__Y,
+ (__v32qi)__M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
+ (__v16hi)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_epi16(X, Y, M) \
+ ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
+ (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
+ (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
+ (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
+ (__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
+ (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_epi8 (__m256i __A)
+{
+ return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sad_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
+ (__v32qi)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi32 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
+}
+#else
+#define _mm256_shuffle_epi32(A, N) \
+ ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
+#define _mm256_shufflehi_epi16(A, N) \
+ ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#define _mm256_shufflelo_epi16(A, N) \
+ ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi8 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_si256 (__m256i __A, const int __N)
+{
+ return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_slli_si256(A, N) \
+ ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi64 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_si256 (__m256i __A, const int __N)
+{
+ return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_srli_si256(A, N) \
+ ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi64 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_load_si256 (__m256i const *__X)
+{
+ return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastss_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastss_ps (__m128 __X)
+{
+ return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastsd_pd (__m128d __X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastsi128_si256 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
+ (__v4si)__Y,
+ __M);
+}
+#else
+#define _mm_blend_epi32(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
+ (__v8si)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_epi32(X, Y, M) \
+ ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastb_epi8 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastw_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastd_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastq_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastb_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastw_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastd_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastq_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_pd (__m256d __X, const int __M)
+{
+ return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
+}
+#else
+#define _mm256_permute4x64_pd(X, M) \
+ ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
+#endif
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_epi64 (__m256i __X, const int __M)
+{
+ return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_permute4x64_epi64(X, M) \
+ ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
+}
+#else
+#define _mm256_permute2x128_si256(X, Y, M) \
+ ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti128_si256 (__m256i __X, const int __M)
+{
+ return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_extracti128_si256(X, M) \
+ ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
+}
+#else
+#define _mm256_inserti128_si256(X, Y, M) \
+ ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi32 (int const *__X, __m256i __M )
+{
+ return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
+ (__v8si)__M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi64 (long long const *__X, __m256i __M )
+{
+ return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
+ (__v4di)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi32 (int const *__X, __m128i __M )
+{
+ return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
+ (__v4si)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi64 (long long const *__X, __m128i __M )
+{
+ return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
+ (__v2di)__M);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
+{
+ __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
+{
+ __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
+{
+ __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
+{
+ __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v2df zero = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (zero, zero);
+
+ return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
+ __m128d mask, const int scale)
+{
+ return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
+ base,
+ (__v4si)index,
+ (__v2df)mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v4df zero = _mm256_setzero_pd ();
+ __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
+
+ return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_pd (__m256d src, double const *base,
+ __m128i index, __m256d mask, const int scale)
+{
+ return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
+ base,
+ (__v4si)index,
+ (__v4df)mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+
+ return (__m128d) __builtin_ia32_gatherdiv2df (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
+ __m128d mask, const int scale)
+{
+ return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
+ base,
+ (__v2di)index,
+ (__v2df)mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
+{
+ __v4df src = _mm256_setzero_pd ();
+ __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
+
+ return (__m256d) __builtin_ia32_gatherdiv4df (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_pd (__m256d src, double const *base,
+ __m256i index, __m256d mask, const int scale)
+{
+ return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
+ base,
+ (__v4di)index,
+ (__v4df)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_ps (float const *base, __m128i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gathersiv4sf (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
+ __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
+ base,
+ (__v4si)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
+{
+ __v8sf src = _mm256_setzero_ps ();
+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+
+ return (__m256) __builtin_ia32_gathersiv8sf (src,
+ base,
+ (__v8si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_ps (__m256 src, float const *base,
+ __m256i index, __m256 mask, const int scale)
+{
+ return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
+ base,
+ (__v8si)index,
+ (__v8sf)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_ps (float const *base, __m128i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gatherdiv4sf (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
+ __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
+ base,
+ (__v2di)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_ps (__m128 src, float const *base,
+ __m256i index, __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
+ base,
+ (__v4di)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v2di src = __extension__ (__v2di){ 0, 0 };
+ __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gathersiv2di (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
+ __m128i index, __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
+ base,
+ (__v4si)index,
+ (__v2di)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+ __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gathersiv4di (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
+ __m128i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
+ base,
+ (__v4si)index,
+ (__v4di)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v2di src = __extension__ (__v2di){ 0, 0 };
+ __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv2di (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
+ base,
+ (__v2di)index,
+ (__v2di)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi64 (long long int const *base,
+ __m256i index, const int scale)
+{
+ __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+ __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gatherdiv4di (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
+ __m256i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
+ base,
+ (__v4di)index,
+ (__v4di)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gathersiv4si (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
+ base,
+ (__v4si)index,
+ (__v4si)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
+{
+ __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
+ __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gathersiv8si (src,
+ base,
+ (__v8si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
+ __m256i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
+ base,
+ (__v8si)index,
+ (__v8si)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv4si (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
+ base,
+ (__v2di)index,
+ (__v4si)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
+ __m256i index, __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
+ base,
+ (__v4di)index,
+ (__v4si)mask,
+ scale);
+}
+#else /* __OPTIMIZE__ */
+#define _mm_i32gather_pd(BASE, INDEX, SCALE) \
+ (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)_mm_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
+ (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)_mm256_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_pd(BASE, INDEX, SCALE) \
+ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
+ (double const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)_mm_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
+ (double const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
+ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)_mm256_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
+ (double const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
+ (float const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ _mm_set1_ps ((float)(int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \
+ (float const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
+ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
+ (float const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)_mm256_set1_ps ( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
+ (float const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
+ (float const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)_mm_set1_ps ( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
+ (float const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
+ (float const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)_mm_set1_ps( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
+ (float const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)_mm_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)_mm256_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
+ (long long const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)_mm_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
+ (long long const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
+ (long long const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)_mm256_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
+ (long long const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)_mm_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
+ (int const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)_mm256_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
+ (int const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)_mm_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)_mm_set1_epi32(-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX2__ */
+
+#endif /* _AVX2INTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avx512cdintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avx512cdintrin.h
new file mode 100644
index 0000000..a4939f7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avx512cdintrin.h
@@ -0,0 +1,184 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512CDINTRIN_H_INCLUDED
+#define _AVX512CDINTRIN_H_INCLUDED
+
+#ifndef __AVX512CD__
+#pragma GCC push_options
+#pragma GCC target("avx512cd")
+#define __DISABLE_AVX512CD__
+#endif /* __AVX512CD__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi32 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi64 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi64 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi32 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m512i) __builtin_ia32_broadcastmw512 (__A);
+}
+
+#ifdef __DISABLE_AVX512CD__
+#undef __DISABLE_AVX512CD__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512CD__ */
+
+#endif /* _AVX512CDINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avx512erintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avx512erintrin.h
new file mode 100644
index 0000000..f6870a5
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avx512erintrin.h
@@ -0,0 +1,394 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512ERINTRIN_H_INCLUDED
+#define _AVX512ERINTRIN_H_INCLUDED
+
+#ifndef __AVX512ER__
+#pragma GCC push_options
+#pragma GCC target("avx512er")
+#define __DISABLE_AVX512ER__
+#endif /* __AVX512ER__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
+ (__v8df) _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ (__v8df) _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
+ (__v2df) __A,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
+ (__v4sf) __A,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ (__v8df) _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
+ (__v2df) __A,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
+ (__v4sf) __A,
+ __R);
+}
+
+#else
+#define _mm512_exp2a23_round_pd(A, C) \
+ __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
+ __builtin_ia32_exp2pd_mask(A, W, U, C)
+
+#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
+ __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_exp2a23_round_ps(A, C) \
+ __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
+ __builtin_ia32_exp2ps_mask(A, W, U, C)
+
+#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
+ __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_rcp28_round_pd(A, C) \
+ __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
+ __builtin_ia32_rcp28pd_mask(A, W, U, C)
+
+#define _mm512_maskz_rcp28_round_pd(U, A, C) \
+ __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_rcp28_round_ps(A, C) \
+ __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
+ __builtin_ia32_rcp28ps_mask(A, W, U, C)
+
+#define _mm512_maskz_rcp28_round_ps(U, A, C) \
+ __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_rsqrt28_round_pd(A, C) \
+ __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+
+#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
+ __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
+
+#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
+ __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_rsqrt28_round_ps(A, C) \
+ __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+
+#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
+ __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
+
+#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
+ __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_rcp28_round_sd(A, B, R) \
+ __builtin_ia32_rcp28sd_round(A, B, R)
+
+#define _mm_rcp28_round_ss(A, B, R) \
+ __builtin_ia32_rcp28ss_round(A, B, R)
+
+#define _mm_rsqrt28_round_sd(A, B, R) \
+ __builtin_ia32_rsqrt28sd_round(A, B, R)
+
+#define _mm_rsqrt28_round_ss(A, B, R) \
+ __builtin_ia32_rsqrt28ss_round(A, B, R)
+
+#endif
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(W, U, A) \
+ _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(U, A) \
+ _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(W, U, A) \
+ _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(U, A) \
+ _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(W, U, A) \
+ _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(U, A) \
+ _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_ps(A) \
+ _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(W, U, A) \
+ _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(U, A) \
+ _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(W, U, A) \
+ _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(U, A) \
+ _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(W, U, A) \
+ _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(U, A) \
+ _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_sd(A, B) \
+ __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_ss(A, B) \
+ __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_sd(A, B) \
+ __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_ss(A, B) \
+ __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
+
+#ifdef __DISABLE_AVX512ER__
+#undef __DISABLE_AVX512ER__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512ER__ */
+
+#endif /* _AVX512ERINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avx512fintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avx512fintrin.h
new file mode 100644
index 0000000..314895a
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avx512fintrin.h
@@ -0,0 +1,12915 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512FINTRIN_H_INCLUDED
+#define _AVX512FINTRIN_H_INCLUDED
+
+#ifndef __AVX512F__
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#define __DISABLE_AVX512F__
+#endif /* __AVX512F__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi64 (long long __A, long long __B, long long __C,
+ long long __D, long long __E, long long __F,
+ long long __G, long long __H)
+{
+ return __extension__ (__m512i) (__v8di)
+ { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H I J K L M N O P]. */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H,
+ int __I, int __J, int __K, int __L,
+ int __M, int __N, int __O, int __P)
+{
+ return __extension__ (__m512i)(__v16si)
+ { __P, __O, __N, __M, __L, __K, __J, __I,
+ __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_pd (double __A, double __B, double __C, double __D,
+ double __E, double __F, double __G, double __H)
+{
+ return __extension__ (__m512d)
+ { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H,
+ float __I, float __J, float __K, float __L,
+ float __M, float __N, float __O, float __P)
+{
+ return __extension__ (__m512)
+ { __P, __O, __N, __M, __L, __K, __J, __I,
+ __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
+ _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
+ e8,e9,e10,e11,e12,e13,e14,e15) \
+ _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
+ _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
+
+#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
+ _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ps (void)
+{
+ __m512 __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_pd (void)
+{
+ __m512d __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_si512 (void)
+{
+ __m512i __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi8 (char __A)
+{
+ return __extension__ (__m512i)(__v64qi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi16 (short __A)
+{
+ return __extension__ (__m512i)(__v32hi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_pd (double __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
+ (__v2df) { __A, },
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_ps (float __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 (__extension__
+ (__v4sf) { __A, },
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+/* Create the vector [A B C D A B C D A B C D A B C D]. */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
+{
+ return __extension__ (__m512i)(__v16si)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi64 (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m512i) (__v8di)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m512d)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_ps (float __A, float __B, float __C, float __D)
+{
+ return __extension__ (__m512)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+#define _mm512_setr4_epi64(e0,e1,e2,e3) \
+ _mm512_set4_epi64(e3,e2,e1,e0)
+
+#define _mm512_setr4_epi32(e0,e1,e2,e3) \
+ _mm512_set4_epi32(e3,e2,e1,e0)
+
+#define _mm512_setr4_pd(e0,e1,e2,e3) \
+ _mm512_set4_pd(e3,e2,e1,e0)
+
+#define _mm512_setr4_ps(e0,e1,e2,e3) \
+ _mm512_set4_ps(e3,e2,e1,e0)
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_ps (void)
+{
+ return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_pd (void)
+{
+ return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_epi32 (void)
+{
+ return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_si512 (void)
+{
+ return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_pd (void const *__P)
+{
+ return *(__m512d *) __P;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_pd (void *__P, __m512d __A)
+{
+ *(__m512d *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ps (void const *__P)
+{
+ return *(__m512 *) __P;
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ps (void *__P, __m512 __A)
+{
+ *(__m512 *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi64 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_si512 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi32 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+ __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epu32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_slli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_slli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_slli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
+ __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_srli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_srli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_srli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_srai_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_srai_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_srai_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_slli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_slli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_slli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_srli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_srli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_srli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_srai_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_srai_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_srai_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+#else
+#define _mm_add_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_add_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_sub_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_sub_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
+ __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
+ __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C,
+ imm, (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm, (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
+ __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm, (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
+ __m512i __C, const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm, (__mmask16) __U);
+}
+#else
+#define _mm512_ternarylogic_epi64(A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
+#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_ternarylogic_epi32(A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)-1))
+#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)(U)))
+#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+ (__v2df) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+ (__v4sf) __A);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
+ (__v2df) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
+ (__v4sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_pd (__m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_ps (__m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
+ (__v2df) __A,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
+ (__v4sf) __A,
+ __R);
+}
+#else
+#define _mm512_sqrt_round_pd(A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
+
+#define _mm512_maskz_sqrt_round_pd(U, A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_sqrt_round_ps(A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
+
+#define _mm512_maskz_sqrt_round_ps(U, A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_sqrt_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
+
+#define _mm_sqrt_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_round(A, B, C)
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi32 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi32 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi64 (__m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi32 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi32 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_epi64 (__m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_add_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_add_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_add_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_add_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_add_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_add_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_sub_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_sub_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_sub_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_sub_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
+ __m512d __V, const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+#else
+#define _mm512_mul_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_mul_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_mul_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_mul_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm512_div_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_div_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_div_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_div_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_div_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_div_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_mul_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_round(A, B, C)
+
+#define _mm_mul_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_mulss_round(A, B, C)
+
+#define _mm_div_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_divsd_round(A, B, C)
+
+#define _mm_div_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_divss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_max_round_pd(A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_max_round_pd(W, U, A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_max_round_pd(U, A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
+
+#define _mm512_max_round_ps(A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_max_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_max_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
+
+#define _mm512_min_round_pd(A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
+
+#define _mm512_mask_min_round_pd(W, U, A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_min_round_pd(U, A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
+
+#define _mm512_min_round_ps(A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
+
+#define _mm512_mask_min_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
+
+#define _mm512_maskz_min_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+#else
+#define _mm512_scalef_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+
+#define _mm512_scalef_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+
+#define _mm_scalef_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
+
+#define _mm_scalef_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_scalefss_round(A, B, C)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_fmadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
+
+#define _mm512_fmadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
+
+#define _mm512_fmsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmaddsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
+
+#define _mm512_fmaddsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
+
+#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
+
+#define _mm512_fmsubadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fmsubadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
+
+#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
+
+#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
+
+#define _mm512_fnmadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
+
+#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
+
+#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
+
+#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
+
+#define _mm512_fnmadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
+
+#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
+
+#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
+
+#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
+
+#define _mm512_fnmsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
+
+#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
+
+#define _mm512_fnmsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
+
+#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
+
+#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
+
+#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi64 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi32 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastss_ps (__m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf) __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastsd_pd (__m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df) __O, __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastd_epi32 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi32 (int __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16)(-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
+{
+ return (__m512i)
+ __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+ (__v16si) _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastq_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi64 (long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8)(-1));
+#else
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8)(-1));
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
+ __M);
+#else
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m512i)
+ __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+ (__v8di) _mm512_setzero_si512 (),
+ __M);
+#else
+ return (__m512i)
+ __builtin_ia32_pbroadcastq512_mem_mask (__A,
+ (__v8di) _mm512_setzero_si512 (),
+ __M);
+#endif
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x4 (__m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf) __O,
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x4 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ (__v16si) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f64x4 (__m256d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ (__v8df) __O,
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i64x4 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ (__v8di) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+typedef enum
+{
+ _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
+ _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
+ _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
+ _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
+ _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
+ _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
+ _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
+ _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
+ _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
+ _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
+ _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
+ _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
+ _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
+ _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
+ _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
+ _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
+ _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
+ _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
+ _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
+ _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
+ _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
+ _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
+ _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
+ _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
+ _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
+ _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
+ _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
+ _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
+ _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
+ _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
+ _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
+ _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
+ _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
+ _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
+ _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
+ _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
+ _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
+ _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
+ _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
+ _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
+ _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
+ _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
+ _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
+ _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
+ _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
+ _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
+ _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
+ _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
+ _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
+ _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
+ _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
+ _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
+ _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
+ _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
+ _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
+ _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
+ _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
+ _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
+ _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
+ _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
+ _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
+ _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
+ _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
+ _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
+ _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
+ _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
+ _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
+ _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
+ _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
+ _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
+ _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
+ _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
+ _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
+ _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
+ _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
+ _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
+ _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
+ _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
+ _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
+ _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
+ _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
+ _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
+ _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
+ _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
+ _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
+ _MM_PERM_DDDD = 0xFF
+} _MM_PERM_ENUM;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#else
+#define _mm512_shuffle_epi32(X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_shuffle_i64x2(X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_i32x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_shuffle_f64x2(X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_f32x4(X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm512_cvtt_roundpd_epi32(A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+
+#define _mm512_cvtt_roundpd_epu32(A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm512_cvt_roundpd_epi32(A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+
+#define _mm512_cvt_roundpd_epu32(A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
+
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_cvtt_roundps_epi32(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+
+#define _mm512_cvtt_roundps_epu32(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_cvt_roundps_epi32(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+
+#define _mm512_cvt_roundps_epu32(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
+
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sd (__m128d __A, unsigned __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
+
+#define _mm_cvt_roundi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+
+#define _mm_cvt_roundsi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+#endif
+
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
+
+#define _mm_cvt_roundi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+
+#define _mm_cvt_roundsi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
+
+#define _mm_cvt_roundi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+
+#define _mm_cvt_roundsi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+#endif
+
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi16 (__m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi16 (__m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi16 (__m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+ __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi32 (__m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi32 (__m512i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi32 (__m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi16 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi16 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi16 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi8 (__m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+ __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_pd (__m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_pd (__m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+#else
+#define _mm512_cvt_roundepi32_ps(A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+
+#define _mm512_cvt_roundepu32_ps(A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf64x4_pd (__m512d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df)
+ _mm256_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf32x4_ps (__m512 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_undefined_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#else
+
+#define _mm512_extractf64x4_pd(X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)_mm256_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extractf64x4_pd(U, X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_extractf32x4_ps(X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)_mm_undefined_ps(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extractf32x4_ps(U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)_mm_setzero_ps(),\
+ (__mmask8)(U)))
+
+#define _mm512_extracti64x4_epi64(X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)_mm256_undefined_si256 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm512_extracti32x4_epi32(X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)_mm_undefined_si128 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__mmask8)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v16si) __A, -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v16sf) __A, -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_insertf32x4(X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
+
+#define _mm512_inserti32x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
+
+#define _mm512_insertf64x4(X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
+ (__mmask8)-1))
+
+#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U)))
+
+#define _mm512_maskz_insertf64x4(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm512_inserti64x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C), \
+ (__v8di)(__m512i)_mm512_undefined_si512 (), \
+ (__mmask8)-1))
+
+#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_inserti64x4(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_pd (void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_pd (void *__P, __m512d __A)
+{
+ __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ps (void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ps (void *__P, __m512 __A)
+{
+ __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_si512 (void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+ __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+ __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_pd (__m512d __A, __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_ps (__m512 __A, __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
+ __mmask8 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
+ (__v8di) __I
+ /* idx */ ,
+ (__v8di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
+ __mmask16 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
+ (__v16si) __I
+ /* idx */ ,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
+ (__v8di) __I
+ /* idx */ ,
+ (__v8df) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
+ (__v16si) __I
+ /* idx */ ,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_pd (__m512d __X, const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_ps (__m512 __X, const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_permute_pd(X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)(-1)))
+
+#define _mm512_mask_permute_pd(W, U, X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U)))
+
+#define _mm512_maskz_permute_pd(U, X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm512_permute_ps(X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
+ (__mmask16)(-1)))
+
+#define _mm512_mask_permute_ps(W, U, X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U)))
+
+#define _mm512_maskz_permute_ps(U, X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_epi64 (__m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) (-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
+ __m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+ (__v8di) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_pd (__m512d __X, const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_permutex_pd(X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_permutex_pd(W, U, X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_permutex_pd(U, X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_permutex_epi64(X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i) \
+ (_mm512_undefined_si512 ()),\
+ (__mmask8)(-1)))
+
+#define _mm512_maskz_permutex_epi64(M, X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i) \
+ (_mm512_setzero_si512 ()),\
+ (__mmask8)(M)))
+
+#define _mm512_mask_permutex_epi64(W, M, X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(M)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+ (__v8di) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+ (__v8di) __X,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+ (__v8di) __X,
+ (__v8di) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+ (__v16si) __X,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+ (__v16si) __X,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+ (__v16si) __X,
+ (__v16si) __W,
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
+ __m512 __V, const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
+ __m512d __V, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
+ const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512i __C, const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512i __C, const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512i __C, const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512i __C, const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm512_shuffle_pd(X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_ps(X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+
+#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), (R)))
+
+#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movehdup_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_moveldup_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi32 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi32 (__m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi64 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi64 (__m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#else
+#define _mm512_rol_epi32(A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_undefined_si512 (), \
+ (__mmask16)(-1)))
+#define _mm512_mask_rol_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+#define _mm512_maskz_rol_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(U)))
+#define _mm512_ror_epi32(A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_undefined_si512 (), \
+ (__mmask16)(-1)))
+#define _mm512_mask_ror_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+#define _mm512_maskz_ror_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(U)))
+#define _mm512_rol_epi64(A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_undefined_si512 (), \
+ (__mmask8)(-1)))
+#define _mm512_mask_rol_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U)))
+#define _mm512_maskz_rol_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+
+#define _mm512_ror_epi64(A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_undefined_si512 (), \
+ (__mmask8)(-1)))
+#define _mm512_mask_ror_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U)))
+#define _mm512_maskz_ror_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_pd (),
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_pd (),
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
+
+#define _mm_cvt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvtt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
+
+#define _mm_cvtt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64(A, B))
+
+#define _mm_cvtt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
+
+#define _mm_cvt_roundss_si32(A, B) \
+ ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvt_roundss_i32(A, B) \
+ ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvtt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
+
+#define _mm_cvtt_roundss_si32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32(A, B))
+
+#define _mm_cvtt_roundss_i32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32(A, B))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
+
+#define _mm_cvt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
+
+#define _mm_cvtt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
+
+#define _mm_cvt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
+
+#define _mm_cvtt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32(A, B))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movedup_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_pd (__m256 __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_ps (__m256i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_ph (__m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_ph (__m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
+ const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi) __U,
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi) __U,
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __W);
+}
+#else
+#define _mm512_cvt_roundps_pd(A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
+
+#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
+
+#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
+
+#define _mm512_cvt_roundph_ps(A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
+
+#define _mm512_cvt_roundps_ph(A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_undefined_si256 (), -1))
+#define _mm512_cvtps_ph(A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_undefined_si256 (), -1))
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_mask_cvtps_ph(U, W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#define _mm512_maskz_cvtps_ph(W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_undefined_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
+ (__v4sf) __B,
+ __R);
+}
+#else
+#define _mm512_cvt_roundpd_ps(A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
+
+#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
+
+#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
+
+#define _mm_cvt_roundsd_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
+
+#define _mm_cvt_roundss_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
+#endif
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_si512 (__m512i * __P, __m512i __A)
+{
+ __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_ps (float *__P, __m512 __A)
+{
+ __builtin_ia32_movntps512 (__P, (__v16sf) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_pd (double *__P, __m512d __A)
+{
+ __builtin_ia32_movntpd512 (__P, (__v8df) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_load_si512 (void *__P)
+{
+ return __builtin_ia32_movntdqa512 ((__v8di *)__P);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_ps (__m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_pd (__m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+/* Constants for mantissa extraction */
+typedef enum
+{
+ _MM_MANT_NORM_1_2, /* interval [1, 2) */
+ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
+ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
+ _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
+} _MM_MANTISSA_NORM_ENUM;
+
+typedef enum
+{
+ _MM_MANT_SIGN_src, /* sign = sign(SRC) */
+ _MM_MANT_SIGN_zero, /* sign = 0 */
+ _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
+} _MM_MANTISSA_SIGN_ENUM;
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df) __W, __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf) __W, __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sd (__m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_ss (__m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
+#else
+#define _mm512_getmant_round_pd(X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
+ (__mmask8)-1,\
+ (R)))
+
+#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U),\
+ (R)))
+#define _mm512_getmant_round_ps(X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_undefined_ps(), \
+ (__mmask16)-1,\
+ (R)))
+
+#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U),\
+ (R)))
+
+#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)(U),\
+ (R)))
+#define _mm_getmant_round_sd(X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_getmant_round_ss(X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_getexp_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
+
+#define _mm_getexp_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
+
+#define _mm512_getexp_round_ps(A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
+
+#define _mm512_mask_getexp_round_ps(W, U, A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), R))
+
+#define _mm512_maskz_getexp_round_ps(U, A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
+
+#define _mm512_getexp_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
+
+#define _mm512_mask_getexp_round_pd(W, U, A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), R))
+
+#define _mm512_maskz_getexp_round_pd(U, A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+ (__v16sf) __A,
+ (__mmask16) __B, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __A, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
+ __m512d __C, const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+ (__v8df) __A,
+ (__mmask8) __B, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
+ const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __A, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+ (__v4sf) __B, __imm, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+ (__v2df) __B, __imm, __R);
+}
+
+#else
+#define _mm512_roundscale_round_ps(A, B, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
+#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ (int)(D), \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), R))
+#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ (int)(C), \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), R))
+#define _mm512_roundscale_round_pd(A, B, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
+#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ (int)(D), \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), R))
+#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ (int)(C), \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), R))
+#define _mm_roundscale_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), R))
+#define _mm_roundscale_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), R))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_alignr_epi32(X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+
+#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_alignr_epi64(X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
+ (__mmask8)-1))
+
+#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 5,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 5,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 5,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 5,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 2,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 2,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 2,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 2,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 1,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 1,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 1,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 1,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 4,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, 4,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 4,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, 4,
+ (__mmask8) -1);
+}
+
+#define _MM_CMPINT_EQ 0x0
+#define _MM_CMPINT_LT 0x1
+#define _MM_CMPINT_LE 0x2
+#define _MM_CMPINT_UNUSED 0x3
+#define _MM_CMPINT_NE 0x4
+#define _MM_CMPINT_NLT 0x5
+#define _MM_CMPINT_GE 0x5
+#define _MM_CMPINT_NLE 0x6
+#define _MM_CMPINT_GT 0x6
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
+ const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
+ const int __P, const int __R)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+#else
+#define _mm512_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm512_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)-1))
+
+#define _mm512_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm512_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)-1))
+
+#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)-1, R))
+
+#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)-1, R))
+
+#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)M))
+
+#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)M))
+
+#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)M))
+
+#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)M))
+
+#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)M, R))
+
+#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)M, R))
+
+#define _mm_cmp_round_sd_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (M), R))
+
+#define _mm_cmp_round_ss_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (M), R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+ __m512 v1_old = _mm512_undefined_ps ();
+ __mmask16 mask = 0xFFFF;
+
+ return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+ __addr,
+ (__v16si) __index,
+ mask, __scale);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
+ __m512i __index, float const *__addr, int __scale)
+{
+ return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+ __addr,
+ (__v16si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
+{
+ __m512d v1_old = _mm512_undefined_pd ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
+ __addr,
+ (__v8si) __index, mask,
+ __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
+ __m256i __index, double const *__addr, int __scale)
+{
+ return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+ __m256 v1_old = _mm256_undefined_ps ();
+ __mmask8 mask = 0xFF;
+
+ return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
+ __m512i __index, float const *__addr, int __scale)
+{
+ return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
+{
+ __m512d v1_old = _mm512_undefined_pd ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
+ __m512i __index, double const *__addr, int __scale)
+{
+ return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
+{
+ __m512i v1_old = _mm512_undefined_si512 ();
+ __mmask16 mask = 0xFFFF;
+
+ return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
+ __addr,
+ (__v16si) __index,
+ mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
+ __m512i __index, int const *__addr, int __scale)
+{
+ return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
+ __addr,
+ (__v16si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
+{
+ __m512i v1_old = _mm512_undefined_si512 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
+ __addr,
+ (__v8si) __index, mask,
+ __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+ __m256i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
+{
+ __m256i v1_old = _mm256_undefined_si256 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
+ __addr,
+ (__v8di) __index,
+ mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
+ __m512i __index, int const *__addr, int __scale)
+{
+ return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
+{
+ __m512i v1_old = _mm512_undefined_si512 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+ __m512i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
+ (__v16si) __index, (__v16sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
+ __m512i __index, __m512 __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
+ (__v16sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
+ int __scale)
+{
+ __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m256i __index, __m512d __v1, int __scale)
+{
+ __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
+ (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m512i __index, __m256 __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
+ (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
+ int __scale)
+{
+ __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m512i __index, __m512d __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
+ (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
+ (__v16si) __index, (__v16si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
+ __m512i __index, __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
+ (__v16si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m256i __index, __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
+ (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
+ __m256i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m512i __index, __m256i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
+ (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m512i __index, __m512i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
+ (__v8di) __v1, __scale);
+}
+#else
+#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
+ (float const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
+ (float const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
+ (double const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
+ (double const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
+ (float const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
+ (float const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
+ (double const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
+ (double const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
+ (int const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
+ (int const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
+ (long long const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
+ (int const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
+ (int const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
+ (long long const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+ __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i)
+ __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
+ (__v16si)
+ _mm512_setzero_si512
+ (), (__mmask16) __U);
+}
+
+/* Mask arithmetic operations */
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kand (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kandn (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestz (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestc (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxnor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_knot (__mmask16 __A)
+{
+ return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+ (__v4si) __D,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+ (__v4sf) __D,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (), __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
+ __m128i __D, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+ (__v4si) __D,
+ __imm,
+ (__v16si) __A,
+ __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
+ __m128 __D, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+ (__v4sf) __D,
+ __imm,
+ (__v16sf) __A, __B);
+}
+#else
+#define _mm512_maskz_insertf32x4(A, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
+ (__mmask8)(A)))
+
+#define _mm512_maskz_inserti32x4(A, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
+ (__mmask8)(A)))
+
+#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
+ (__mmask8)(B)))
+
+#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
+ (__mmask8)(B)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+#else
+#define _mm_max_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_max_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_min_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_min_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_round(A, B, C)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
+{
+ return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
+{
+ return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+#else
+#define _mm_fmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
+
+#define _mm_fmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
+
+#define _mm_fmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
+
+#define _mm_fmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
+
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
+
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
+
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
+
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
+{
+ return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
+{
+ return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
+}
+#else
+#define _mm_comi_round_ss(A, B, C, D)\
+__builtin_ia32_vcomiss(A, B, C, D)
+#define _mm_comi_round_sd(A, B, C, D)\
+__builtin_ia32_vcomisd(A, B, C, D)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pd (__m512d __M, __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
+ (__v2df) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epi32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epu32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epi32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epu32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epi32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epu32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epi32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epu32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_ss (__m128 __A, unsigned __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_ps (__m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_ps (__m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#else
+#define _mm512_fixupimm_pd(X, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_fixupimm_ps(X, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_sd(X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_ss(X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i64 (__m128 __A)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i32 (__m128 __A)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i64 (__m128d __A)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i32 (__m128d __A)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_pd (__m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_ps (__m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_ps (__m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_undefined_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_getmant_pd(X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_pd(W, U, X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_pd(U, X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm512_getmant_ps(X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_ps(W, U, X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_ps(U, X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_sd(X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getmant_ss(X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_ss(A, B) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_sd(A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_ps(A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_ps(W, U, A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_ps(U, A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_pd(A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_pd(W, U, A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_pd(U, A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_ps (__m512 __A, const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+ (__v16sf) __A,
+ (__mmask16) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pd (__m512d __A, const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+ (__v8df) __A,
+ (__mmask8) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
+ (__v2df) __B, __imm,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_roundscale_ps(A, B) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_ps(A, B, C, D) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ (int)(D), \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_ps(A, B, C) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ (int)(C), \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_pd(A, B) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_pd(A, B, C, D) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ (int)(D), \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ (int)(C), \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_ss(A, B, C) \
+ ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_cmp_ps_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_sd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_ss_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ M,_MM_FROUND_CUR_DIRECTION))
+#endif
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kmov (__mmask16 __A)
+{
+ return __builtin_ia32_kmov16 (__A);
+}
+
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512F__ */
+
+#endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avx512pfintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avx512pfintrin.h
new file mode 100644
index 0000000..bc7598e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avx512pfintrin.h
@@ -0,0 +1,212 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512PFINTRIN_H_INCLUDED
+#define _AVX512PFINTRIN_H_INCLUDED
+
+#ifndef __AVX512PF__
+#pragma GCC push_options
+#pragma GCC target("avx512pf")
+#define __DISABLE_AVX512PF__
+#endif /* __AVX512PF__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index,
+ (long long const *)addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
+ __m256i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
+}
+
+#else
+#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+#endif
+
+#ifdef __DISABLE_AVX512PF__
+#undef __DISABLE_AVX512PF__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512PF__ */
+
+#endif /* _AVX512PFINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/avxintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/avxintrin.h
new file mode 100644
index 0000000..2ea327c
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/avxintrin.h
@@ -0,0 +1,1463 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 11.0. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXINTRIN_H_INCLUDED
+#define _AVXINTRIN_H_INCLUDED
+
+#ifndef __AVX__
+#pragma GCC push_options
+#pragma GCC target("avx")
+#define __DISABLE_AVX__
+#endif /* __AVX__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m256 __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef double __m256d __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+
+/* Compare predicates for scalar and packed compare intrinsics. */
+
+/* Equal (ordered, non-signaling) */
+#define _CMP_EQ_OQ 0x00
+/* Less-than (ordered, signaling) */
+#define _CMP_LT_OS 0x01
+/* Less-than-or-equal (ordered, signaling) */
+#define _CMP_LE_OS 0x02
+/* Unordered (non-signaling) */
+#define _CMP_UNORD_Q 0x03
+/* Not-equal (unordered, non-signaling) */
+#define _CMP_NEQ_UQ 0x04
+/* Not-less-than (unordered, signaling) */
+#define _CMP_NLT_US 0x05
+/* Not-less-than-or-equal (unordered, signaling) */
+#define _CMP_NLE_US 0x06
+/* Ordered (nonsignaling) */
+#define _CMP_ORD_Q 0x07
+/* Equal (unordered, non-signaling) */
+#define _CMP_EQ_UQ 0x08
+/* Not-greater-than-or-equal (unordered, signaling) */
+#define _CMP_NGE_US 0x09
+/* Not-greater-than (unordered, signaling) */
+#define _CMP_NGT_US 0x0a
+/* False (ordered, non-signaling) */
+#define _CMP_FALSE_OQ 0x0b
+/* Not-equal (ordered, non-signaling) */
+#define _CMP_NEQ_OQ 0x0c
+/* Greater-than-or-equal (ordered, signaling) */
+#define _CMP_GE_OS 0x0d
+/* Greater-than (ordered, signaling) */
+#define _CMP_GT_OS 0x0e
+/* True (unordered, non-signaling) */
+#define _CMP_TRUE_UQ 0x0f
+/* Equal (ordered, signaling) */
+#define _CMP_EQ_OS 0x10
+/* Less-than (ordered, non-signaling) */
+#define _CMP_LT_OQ 0x11
+/* Less-than-or-equal (ordered, non-signaling) */
+#define _CMP_LE_OQ 0x12
+/* Unordered (signaling) */
+#define _CMP_UNORD_S 0x13
+/* Not-equal (unordered, signaling) */
+#define _CMP_NEQ_US 0x14
+/* Not-less-than (unordered, non-signaling) */
+#define _CMP_NLT_UQ 0x15
+/* Not-less-than-or-equal (unordered, non-signaling) */
+#define _CMP_NLE_UQ 0x16
+/* Ordered (signaling) */
+#define _CMP_ORD_S 0x17
+/* Equal (unordered, signaling) */
+#define _CMP_EQ_US 0x18
+/* Not-greater-than-or-equal (unordered, non-signaling) */
+#define _CMP_NGE_UQ 0x19
+/* Not-greater-than (unordered, non-signaling) */
+#define _CMP_NGT_UQ 0x1a
+/* False (ordered, signaling) */
+#define _CMP_FALSE_OS 0x1b
+/* Not-equal (ordered, signaling) */
+#define _CMP_NEQ_OS 0x1c
+/* Greater-than-or-equal (ordered, non-signaling) */
+#define _CMP_GE_OQ 0x1d
+/* Greater-than (ordered, non-signaling) */
+#define _CMP_GT_OQ 0x1e
+/* True (unordered, signaling) */
+#define _CMP_TRUE_US 0x1f
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Double/single precision floating point blend instructions - select
+ data from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
+{
+ return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_pd(X, Y, M) \
+ ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(M)))
+
+#define _mm256_blend_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
+{
+ return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ (__v4df)__M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
+{
+ return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ (__v8sf)__M);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_dp_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
+{
+ return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
+ __mask);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
+{
+ return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
+ __mask);
+}
+#else
+#define _mm256_shuffle_pd(A, B, N) \
+ ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(N)))
+
+#define _mm256_shuffle_ps(A, B, N) \
+ ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(N)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
+{
+ return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
+ __P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
+{
+ return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
+ __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ps(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm256_cmp_pd(X, Y, P) \
+ ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P)))
+
+#define _mm256_cmp_ps(X, Y, P) \
+ ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_pd (__m128i __A)
+{
+ return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ps (__m256i __A)
+{
+ return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ps (__m256d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_pd (__m128 __A)
+{
+ return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_pd (__m256d __X, const int __N)
+{
+ return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_ps (__m256 __X, const int __N)
+{
+ return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_si256 (__m256i __X, const int __N)
+{
+ return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi32 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ return _mm_extract_epi32 (__Y, __N % 4);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi16 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ return _mm_extract_epi16 (__Y, __N % 8);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi8 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ return _mm_extract_epi8 (__Y, __N % 16);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi64 (__m256i __X, const int __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ return _mm_extract_epi64 (__Y, __N % 2);
+}
+#endif
+#else
+#define _mm256_extractf128_pd(X, N) \
+ ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_ps(X, N) \
+ ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_si256(X, N) \
+ ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \
+ (int)(N)))
+
+#define _mm256_extract_epi32(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ _mm_extract_epi32 (__Y, (N) % 4); \
+ }))
+
+#define _mm256_extract_epi16(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ _mm_extract_epi16 (__Y, (N) % 8); \
+ }))
+
+#define _mm256_extract_epi8(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ _mm_extract_epi8 (__Y, (N) % 16); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_extract_epi64(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ _mm_extract_epi64 (__Y, (N) % 2); \
+ }))
+#endif
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroall (void)
+{
+ __builtin_ia32_vzeroall ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroupper (void)
+{
+ __builtin_ia32_vzeroupper ();
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_pd (__m128d __A, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
+ (__v2di)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
+ (__v4di)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_ps (__m128 __A, __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
+ (__v4si)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
+ (__v8si)__C);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_pd (__m128d __X, const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_pd (__m256d __X, const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_ps (__m128 __X, const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_ps (__m256 __X, const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
+}
+#else
+#define _mm_permute_pd(X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
+
+#define _mm256_permute_pd(X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C)))
+
+#define _mm_permute_ps(X, C) \
+ ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
+
+#define _mm256_permute_ps(X, C) \
+ ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
+{
+ return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
+{
+ return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __C);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
+{
+ return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
+ (__v8si)__Y,
+ __C);
+}
+#else
+#define _mm256_permute2f128_pd(X, Y, C) \
+ ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_ps(X, Y, C) \
+ ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_si256(X, Y, C) \
+ ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), \
+ (int)(C)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_ss (float const *__X)
+{
+ return (__m128) __builtin_ia32_vbroadcastss (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_sd (double const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ss (float const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastss256 (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_pd (__m128d const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ps (__m128 const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
+{
+ return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+ (__v2df)__Y,
+ __O);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
+{
+ return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+ (__v4sf)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
+{
+ return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
+ (__v4si)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
+}
+
+#ifdef __x86_64__
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
+}
+#endif
+#else
+#define _mm256_insertf128_pd(X, Y, O) \
+ ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O) \
+ ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O) \
+ ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
+ (__v4si)(__m128i)(Y), \
+ (int)(O)))
+
+#define _mm256_insert_epi32(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \
+ }))
+
+#define _mm256_insert_epi16(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \
+ }))
+
+#define _mm256_insert_epi8(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_insert_epi64(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \
+ }))
+#endif
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_pd (double const *__P)
+{
+ return *(__m256d *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_pd (double *__P, __m256d __A)
+{
+ *(__m256d *)__P = __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ps (float const *__P)
+{
+ return *(__m256 *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ps (float *__P, __m256 __A)
+{
+ *(__m256 *)__P = __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_pd (double const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_pd (double *__P, __m256d __A)
+{
+ __builtin_ia32_storeupd256 (__P, (__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ps (float const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_storeups256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_si256 (__m256i const *__P)
+{
+ return *__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_si256 (__m256i *__P, __m256i __A)
+{
+ *__P = __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_si256 (__m256i *__P, __m256i __A)
+{
+ __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_pd (double const *__P, __m128i __M)
+{
+ return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
+ (__v2di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
+{
+ __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_pd (double const *__P, __m256i __M)
+{
+ return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
+ (__v4di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
+{
+ __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_ps (float const *__P, __m128i __M)
+{
+ return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
+ (__v4si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
+{
+ __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_ps (float const *__P, __m256i __M)
+{
+ return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
+ (__v8si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
+{
+ __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movehdup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_moveldup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movedup_pd (__m256d __X)
+{
+ return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lddqu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+ __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+ __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_pd (__m256d __V, const int __M)
+{
+ return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_ps (__m256 __V, const int __M)
+{
+ return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+}
+#else
+#define _mm256_round_pd(V, M) \
+ ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
+
+#define _mm256_round_ps(V, M) \
+ ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
+#endif
+
+#define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
+#define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_pd (__m256d __A)
+{
+ return __builtin_ia32_movmskpd256 ((__v4df)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_ps (__m256 __A)
+{
+ return __builtin_ia32_movmskps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_pd (void)
+{
+ __m256d __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ps (void)
+{
+ __m256 __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_si256 (void)
+{
+ __m256i __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_pd (void)
+{
+ return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ps (void)
+{
+ return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_si256 (void)
+{
+ return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
+/* Create the vector [A B C D]. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m256d){ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return __extension__ (__m256){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return __extension__ (__m256i)(__v16hi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m256i)(__v32qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+ return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ps (float __A)
+{
+ return __extension__ (__m256){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi32 (int __A)
+{
+ return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi16 (short __A)
+{
+ return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi8 (char __A)
+{
+ return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+ return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
+/* Create vectors of elements in the reversed order from the
+ _mm256_set_XXX functions. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_pd (double __A, double __B, double __C, double __D)
+{
+ return _mm256_set_pd (__D, __C, __B, __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19,
+ __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27,
+ __q28, __q29, __q30, __q31);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return _mm256_set_epi64x (__D, __C, __B, __A);
+}
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ps (__m256d __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_si256 (__m256d __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_pd (__m256 __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_si256(__m256 __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ps (__m256i __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_pd (__m256i __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd256_pd128 (__m256d __A)
+{
+ return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps256_ps128 (__m256 __A)
+{
+ return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_si128 (__m256i __A)
+{
+ return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
+}
+
+/* When cast is done from a 128 to 256-bit type, the low 128 bits of
+ the 256-bit result contain source parameter value and the upper 128
+ bits of the result are undefined. Those intrinsics shouldn't
+ generate any extra moves. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd128_pd256 (__m128d __A)
+{
+ return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps128_ps256 (__m128 __A)
+{
+ return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi128_si256 (__m128i __A)
+{
+ return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
+}
+
+#ifdef __DISABLE_AVX__
+#undef __DISABLE_AVX__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX__ */
+
+#endif /* _AVXINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/bmi2intrin.h b/lib/gcc/x86_64-linux-android/4.9/include/bmi2intrin.h
new file mode 100644
index 0000000..ff96296
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/bmi2intrin.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __DISABLE_BMI2__
+#endif /* __BMI2__ */
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+ return __builtin_ia32_bzhi_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+ return __builtin_ia32_pdep_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+ return __builtin_ia32_pext_si (__X, __Y);
+}
+
+#ifdef __x86_64__
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return __builtin_ia32_bzhi_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return __builtin_ia32_pdep_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return __builtin_ia32_pext_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u64 (unsigned long long __X, unsigned long long __Y,
+ unsigned long long *__P)
+{
+ unsigned __int128 __res = (unsigned __int128) __X * __Y;
+ *__P = (unsigned long long) (__res >> 64);
+ return (unsigned long long) __res;
+}
+
+#else /* !__x86_64__ */
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
+{
+ unsigned long long __res = (unsigned long long) __X * __Y;
+ *__P = (unsigned int) (__res >> 32);
+ return (unsigned int) __res;
+}
+
+#endif /* !__x86_64__ */
+
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI2__ */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/bmiintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/bmiintrin.h
new file mode 100644
index 0000000..b2d7c60
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/bmiintrin.h
@@ -0,0 +1,184 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _BMIINTRIN_H_INCLUDED
+#define _BMIINTRIN_H_INCLUDED
+
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __DISABLE_BMI__
+#endif /* __BMI__ */
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u16 (unsigned short __X)
+{
+ return __builtin_ctzs (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u32 (unsigned int __X, unsigned int __Y)
+{
+ return ~__X & __Y;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u32 (unsigned int __X, unsigned int __Y)
+{
+ return __builtin_ia32_bextr_u32 (__X, __Y);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
+{
+ return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u32 (unsigned int __X)
+{
+ return __X & -__X;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u32 (unsigned int __X)
+{
+ return __blsi_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u32 (unsigned int __X)
+{
+ return __X ^ (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u32 (unsigned int __X)
+{
+ return __blsmsk_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u32 (unsigned int __X)
+{
+ return __X & (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u32 (unsigned int __X)
+{
+ return __blsr_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u32 (unsigned int __X)
+{
+ return __builtin_ctz (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u32 (unsigned int __X)
+{
+ return __builtin_ctz (__X);
+}
+
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return ~__X & __Y;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u64 (unsigned long long __X, unsigned long long __Y)
+{
+ return __builtin_ia32_bextr_u64 (__X, __Y);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
+{
+ return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u64 (unsigned long long __X)
+{
+ return __X & -__X;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u64 (unsigned long long __X)
+{
+ return __blsi_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u64 (unsigned long long __X)
+{
+ return __X ^ (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u64 (unsigned long long __X)
+{
+ return __blsmsk_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u64 (unsigned long long __X)
+{
+ return __X & (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u64 (unsigned long long __X)
+{
+ return __blsr_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+
+#endif /* __x86_64__ */
+
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI__ */
+
+#endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/bmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/bmmintrin.h
new file mode 100644
index 0000000..24cf26e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/bmmintrin.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _BMMINTRIN_H_INCLUDED
+#define _BMMINTRIN_H_INCLUDED
+
+# error "SSE5 instruction set removed from compiler"
+
+#endif /* _BMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/cpuid.h b/lib/gcc/x86_64-linux-android/4.9/include/cpuid.h
new file mode 100644
index 0000000..8c323ae
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/cpuid.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2007-2014 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3 (1 << 0)
+#define bit_PCLMUL (1 << 1)
+#define bit_LZCNT (1 << 5)
+#define bit_SSSE3 (1 << 9)
+#define bit_FMA (1 << 12)
+#define bit_CMPXCHG16B (1 << 13)
+#define bit_SSE4_1 (1 << 19)
+#define bit_SSE4_2 (1 << 20)
+#define bit_MOVBE (1 << 22)
+#define bit_POPCNT (1 << 23)
+#define bit_AES (1 << 25)
+#define bit_XSAVE (1 << 26)
+#define bit_OSXSAVE (1 << 27)
+#define bit_AVX (1 << 28)
+#define bit_F16C (1 << 29)
+#define bit_RDRND (1 << 30)
+
+/* %edx */
+#define bit_CMPXCHG8B (1 << 8)
+#define bit_CMOV (1 << 15)
+#define bit_MMX (1 << 23)
+#define bit_FXSAVE (1 << 24)
+#define bit_SSE (1 << 25)
+#define bit_SSE2 (1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM (1 << 0)
+#define bit_ABM (1 << 5)
+#define bit_SSE4a (1 << 6)
+#define bit_PRFCHW (1 << 8)
+#define bit_XOP (1 << 11)
+#define bit_LWP (1 << 15)
+#define bit_FMA4 (1 << 16)
+#define bit_TBM (1 << 21)
+
+/* %edx */
+#define bit_MMXEXT (1 << 22)
+#define bit_LM (1 << 29)
+#define bit_3DNOWP (1 << 30)
+#define bit_3DNOW (1 << 31)
+
+/* Extended Features (%eax == 7) */
+/* %ebx */
+#define bit_FSGSBASE (1 << 0)
+#define bit_BMI (1 << 3)
+#define bit_HLE (1 << 4)
+#define bit_AVX2 (1 << 5)
+#define bit_BMI2 (1 << 8)
+#define bit_RTM (1 << 11)
+#define bit_AVX512F (1 << 16)
+#define bit_RDSEED (1 << 18)
+#define bit_ADX (1 << 19)
+#define bit_AVX512PF (1 << 26)
+#define bit_AVX512ER (1 << 27)
+#define bit_AVX512CD (1 << 28)
+#define bit_SHA (1 << 29)
+
+/* %ecx */
+#define bit_PREFETCHWT1 (1 << 0)
+
+/* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
+#define bit_XSAVEOPT (1 << 0)
+
+/* Signatures for different CPU implementations as returned in uses
+ of cpuid with level 0. */
+#define signature_AMD_ebx 0x68747541
+#define signature_AMD_ecx 0x444d4163
+#define signature_AMD_edx 0x69746e65
+
+#define signature_CENTAUR_ebx 0x746e6543
+#define signature_CENTAUR_ecx 0x736c7561
+#define signature_CENTAUR_edx 0x48727561
+
+#define signature_CYRIX_ebx 0x69727943
+#define signature_CYRIX_ecx 0x64616574
+#define signature_CYRIX_edx 0x736e4978
+
+#define signature_INTEL_ebx 0x756e6547
+#define signature_INTEL_ecx 0x6c65746e
+#define signature_INTEL_edx 0x49656e69
+
+#define signature_TM1_ebx 0x6e617254
+#define signature_TM1_ecx 0x55504361
+#define signature_TM1_edx 0x74656d73
+
+#define signature_TM2_ebx 0x756e6547
+#define signature_TM2_ecx 0x3638784d
+#define signature_TM2_edx 0x54656e69
+
+#define signature_NSC_ebx 0x646f6547
+#define signature_NSC_ecx 0x43534e20
+#define signature_NSC_edx 0x79622065
+
+#define signature_NEXGEN_ebx 0x4778654e
+#define signature_NEXGEN_ecx 0x6e657669
+#define signature_NEXGEN_edx 0x72446e65
+
+#define signature_RISE_ebx 0x65736952
+#define signature_RISE_ecx 0x65736952
+#define signature_RISE_edx 0x65736952
+
+#define signature_SIS_ebx 0x20536953
+#define signature_SIS_ecx 0x20536953
+#define signature_SIS_edx 0x20536953
+
+#define signature_UMC_ebx 0x20434d55
+#define signature_UMC_ecx 0x20434d55
+#define signature_UMC_edx 0x20434d55
+
+#define signature_VIA_ebx 0x20414956
+#define signature_VIA_ecx 0x20414956
+#define signature_VIA_edx 0x20414956
+
+#define signature_VORTEX_ebx 0x74726f56
+#define signature_VORTEX_ecx 0x436f5320
+#define signature_VORTEX_edx 0x36387865
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register. */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t" \
+ "cpuid\n\t" \
+ "xchg{l}\t{%%}ebx, %k1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t" \
+ "cpuid\n\t" \
+ "xchg{l}\t{%%}ebx, %k1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+ nor alternatives in i386 code. */
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("xchgl\t%%ebx, %k1\n\t" \
+ "cpuid\n\t" \
+ "xchgl\t%%ebx, %k1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("xchgl\t%%ebx, %k1\n\t" \
+ "cpuid\n\t" \
+ "xchgl\t%%ebx, %k1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+#elif defined(__x86_64__) && (defined(__code_model_medium__) || defined(__code_model_large__)) && defined(__PIC__)
+/* %rbx may be the PIC register. */
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t" \
+ "cpuid\n\t" \
+ "xchg{q}\t{%%}rbx, %q1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t" \
+ "cpuid\n\t" \
+ "xchg{q}\t{%%}rbx, %q1\n\t" \
+ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#else
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction. ext can
+ be either 0x0 or 0x8000000 to return highest supported value for
+ basic or extended cpuid information. Function returns 0 if cpuid
+ is not supported or whatever cpuid returns in eax register. If sig
+ pointer is non-null, then first four bytes of the signature
+ (as found in ebx register) are returned in location pointed by sig. */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+ unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+ /* See if we can use cpuid. On AMD64 we always can. */
+#if __GNUC__ >= 3
+ __asm__ ("pushf{l|d}\n\t"
+ "pushf{l|d}\n\t"
+ "pop{l}\t%0\n\t"
+ "mov{l}\t{%0, %1|%1, %0}\n\t"
+ "xor{l}\t{%2, %0|%0, %2}\n\t"
+ "push{l}\t%0\n\t"
+ "popf{l|d}\n\t"
+ "pushf{l|d}\n\t"
+ "pop{l}\t%0\n\t"
+ "popf{l|d}\n\t"
+ : "=&r" (__eax), "=&r" (__ebx)
+ : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+ nor alternatives in i386 code. */
+ __asm__ ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl\t%0\n\t"
+ "movl\t%0, %1\n\t"
+ "xorl\t%2, %0\n\t"
+ "pushl\t%0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl\t%0\n\t"
+ "popfl\n\t"
+ : "=&r" (__eax), "=&r" (__ebx)
+ : "i" (0x00200000));
+#endif
+
+ if (!((__eax ^ __ebx) & 0x00200000))
+ return 0;
+#endif
+
+ /* Host supports cpuid. Return highest supported cpuid input value. */
+ __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+ if (__sig)
+ *__sig = __ebx;
+
+ return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+ eax, ebx, ecx and edx registers. The function checks if cpuid is
+ supported and returns 1 for valid cpuid information or 0 for
+ unsupported cpuid level. All pointers are required to be non-null. */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+ unsigned int *__eax, unsigned int *__ebx,
+ unsigned int *__ecx, unsigned int *__edx)
+{
+ unsigned int __ext = __level & 0x80000000;
+
+ if (__get_cpuid_max (__ext, 0) < __level)
+ return 0;
+
+ __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+ return 1;
+}
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/cross-stdarg.h b/lib/gcc/x86_64-linux-android/4.9/include/cross-stdarg.h
new file mode 100644
index 0000000..d16cef8
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/cross-stdarg.h
@@ -0,0 +1,72 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef __CROSS_STDARG_H_INCLUDED
+#define __CROSS_STDARG_H_INCLUDED
+
+/* Make sure that for non x64 targets cross builtins are defined. */
+#ifndef __x86_64__
+/* Call abi ms_abi. */
+#define __builtin_ms_va_list __builtin_va_list
+#define __builtin_ms_va_copy __builtin_va_copy
+#define __builtin_ms_va_start __builtin_va_start
+#define __builtin_ms_va_end __builtin_va_end
+
+/* Call abi sysv_abi. */
+#define __builtin_sysv_va_list __builtin_va_list
+#define __builtin_sysv_va_copy __builtin_va_copy
+#define __builtin_sysv_va_start __builtin_va_start
+#define __builtin_sysv_va_end __builtin_va_end
+#endif
+
+#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
+#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
+#define __ms_va_arg(__v,__l) __builtin_va_arg(__v,__l)
+#define __ms_va_end(__v) __builtin_ms_va_end(__v)
+
+#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
+#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
+#define __sysv_va_arg(__v,__l) __builtin_va_arg(__v,__l)
+#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
+
+#ifndef __GNUC_SYSV_VA_LIST
+#define __GNUC_SYSV_VA_LIST
+ typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
+#endif
+
+#ifndef _SYSV_VA_LIST_DEFINED
+#define _SYSV_VA_LIST_DEFINED
+ typedef __gnuc_sysv_va_list sysv_va_list;
+#endif
+
+#ifndef __GNUC_MS_VA_LIST
+#define __GNUC_MS_VA_LIST
+ typedef __builtin_ms_va_list __gnuc_ms_va_list;
+#endif
+
+#ifndef _MS_VA_LIST_DEFINED
+#define _MS_VA_LIST_DEFINED
+ typedef __gnuc_ms_va_list ms_va_list;
+#endif
+
+#endif /* __CROSS_STDARG_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/emmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/emmintrin.h
new file mode 100644
index 0000000..a2bdf0e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/emmintrin.h
@@ -0,0 +1,1541 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE header files*/
+#include <xmmintrin.h>
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+/* SSE2 */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Create a selector for use with the SHUFPD instruction. */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+/* Create a vector with element 0 as F and the rest zero. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_sd (double __F)
+{
+ return __extension__ (__m128d){ __F, 0.0 };
+}
+
+/* Create a vector with both elements equal to F. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pd (double __F)
+{
+ return __extension__ (__m128d){ __F, __F };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd1 (double __F)
+{
+ return _mm_set1_pd (__F);
+}
+
+/* Create a vector with the lower value X and upper value W. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __X, __W };
+}
+
+/* Create a vector with the lower value W and upper value X. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __W, __X };
+}
+
+/* Create an undefined vector. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_pd (void)
+{
+ __m128d __Y = __Y;
+ return __Y;
+}
+
+/* Create a vector of zeros. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_pd (void)
+{
+ return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
+/* Sets the low DPFP value of A from the low value of B. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* Load two DPFP values from P. The address must be 16-byte aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd (double const *__P)
+{
+ return *(__m128d *)__P;
+}
+
+/* Load two DPFP values from P. The address need not be 16-byte aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_pd (double const *__P)
+{
+ return __builtin_ia32_loadupd (__P);
+}
+
+/* Create a vector with all two elements equal to *P. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_pd (double const *__P)
+{
+ return _mm_set1_pd (*__P);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_sd (double const *__P)
+{
+ return _mm_set_sd (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd1 (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values in reverse order. The address must be aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_pd (double const *__P)
+{
+ __m128d __tmp = _mm_load_pd (__P);
+ return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Store two DPFP values. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd (double *__P, __m128d __A)
+{
+ *(__m128d *)__P = __A;
+}
+
+/* Store two DPFP values. The address need not be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+ __builtin_ia32_storeupd (__P, __A);
+}
+
+/* Stores the lower DPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_sd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_f64 (__m128d __A)
+{
+ return __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pd (double *__P, __m128d __A)
+{
+ _mm_store_sd (__P, __A);
+}
+
+/* Stores the upper DPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
+}
+
+/* Store the lower DPFP value across two words.
+ The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+ _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values in reverse order. The address must be aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si32 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_pd (__m128d __A)
+{
+ return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+ __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+ return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmplesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnlesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector of Qi, where i is the element number. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64 (__m64 __q1, __m64 __q0)
+{
+ return _mm_set_epi64x ((long long)__q1, (long long)__q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+ short __q3, short __q2, short __q1, short __q0)
+{
+ return __extension__ (__m128i)(__v8hi){
+ __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m128i)(__v16qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+/* Set all of the elements of the vector to A. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64x (long long __A)
+{
+ return _mm_set_epi64x (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi32 (int __A)
+{
+ return _mm_set_epi32 (__A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi16 (short __A)
+{
+ return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi8 (char __A)
+{
+ return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+/* Create a vector of Qi, where i is the element number.
+ The parameter order is reversed from the _mm_set_epi* functions. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi64 (__m64 __q0, __m64 __q1)
+{
+ return _mm_set_epi64 (__q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
+{
+ return _mm_set_epi32 (__q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
+ short __q4, short __q5, short __q6, short __q7)
+{
+ return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
+ char __q04, char __q05, char __q06, char __q07,
+ char __q08, char __q09, char __q10, char __q11,
+ char __q12, char __q13, char __q14, char __q15)
+{
+ return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
+ __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_si128 (__m128i const *__P)
+{
+ return *__P;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_epi64 (__m128i const *__P)
+{
+ return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+ *__P = __B;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+ __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+ *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_pi64 (__m128i __B)
+{
+ return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movpi64_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 ((__m64)0LL, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi64 (__m128i __A)
+{
+ return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
+}
+
+/* Create an undefined vector. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_si128 (void)
+{
+ __m128i __Y = __Y;
+ return __Y;
+}
+
+/* Create a vector of zeros. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si128 (void)
+{
+ return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_pd (__m128i __A)
+{
+ return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ps (__m128i __A)
+{
+ return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_ps (__m128d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_pd (__m64 __A)
+{
+ return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pd (__m128 __A)
+{
+ return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+ return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+ return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
+{
+ return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
+}
+#else
+#define _mm_shuffle_pd(A, B, N) \
+ ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(N)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pd (__m128d __A)
+{
+ return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si128 (__m128i __A, const int __N)
+{
+ return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si128 (__m128i __A, const int __N)
+{
+ return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
+}
+#else
+#define _mm_srli_si128(A, N) \
+ ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
+#define _mm_slli_si128(A, N) \
+ ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi16 (__m128i const __A, int const __N)
+{
+ return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
+}
+#else
+#define _mm_extract_epi16(A, N) \
+ ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N) \
+ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
+ (int)(D), (int)(N)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_epi8 (__m128i __A)
+{
+ return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflehi_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflelo_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi32 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
+}
+#else
+#define _mm_shufflehi_epi16(A, N) \
+ ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shufflelo_epi16(A, N) \
+ ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shuffle_epi32(A, N) \
+ ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+ __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si32 (int *__A, int __B)
+{
+ __builtin_ia32_movnti (__A, __B);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si64 (long long int *__A, long long int __B)
+{
+ __builtin_ia32_movnti64 (__A, __B);
+}
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+ __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pd (double *__A, __m128d __B)
+{
+ __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clflush (void const *__A)
+{
+ __builtin_ia32_clflush (__A);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lfence (void)
+{
+ __builtin_ia32_lfence ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mfence (void)
+{
+ __builtin_ia32_mfence ();
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si128 (int __A)
+{
+ return _mm_set_epi32 (0, 0, 0, __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+#endif
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+ return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+ return (__m128d) __A;
+}
+
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/f16cintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/f16cintrin.h
new file mode 100644
index 0000000..1181f8b
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/f16cintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
+#endif
+
+#ifndef _F16CINTRIN_H_INCLUDED
+#define _F16CINTRIN_H_INCLUDED
+
+#ifndef __F16C__
+#pragma GCC push_options
+#pragma GCC target("f16c")
+#define __DISABLE_F16C__
+#endif /* __F16C__ */
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtsh_ss (unsigned short __S)
+{
+ __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
+ __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
+ return __builtin_ia32_vec_ext_v4sf (__A, 0);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_ps (__m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtss_sh (float __F, const int __I)
+{
+ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
+ __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
+ return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_ph (__m128 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_ph (__m256 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
+}
+#else
+#define _cvtss_sh(__F, __I) \
+ (__extension__ \
+ ({ \
+ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \
+ __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \
+ (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \
+ }))
+
+#define _mm_cvtps_ph(A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I)))
+
+#define _mm256_cvtps_ph(A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
+#endif /* __OPTIMIZE */
+
+#ifdef __DISABLE_F16C__
+#undef __DISABLE_F16C__
+#pragma GCC pop_options
+#endif /* __DISABLE_F16C__ */
+
+#endif /* _F16CINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/float.h b/lib/gcc/x86_64-linux-android/4.9/include/float.h
new file mode 100644
index 0000000..a8e05bf
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/float.h
@@ -0,0 +1,277 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 5.2.4.2.2 Characteristics of floating types <float.h>
+ */
+
+#ifndef _FLOAT_H___
+#define _FLOAT_H___
+
+/* Radix of exponent representation, b. */
+#undef FLT_RADIX
+#define FLT_RADIX __FLT_RADIX__
+
+/* Number of base-FLT_RADIX digits in the significand, p. */
+#undef FLT_MANT_DIG
+#undef DBL_MANT_DIG
+#undef LDBL_MANT_DIG
+#define FLT_MANT_DIG __FLT_MANT_DIG__
+#define DBL_MANT_DIG __DBL_MANT_DIG__
+#define LDBL_MANT_DIG __LDBL_MANT_DIG__
+
+/* Number of decimal digits, q, such that any floating-point number with q
+ decimal digits can be rounded into a floating-point number with p radix b
+ digits and back again without change to the q decimal digits,
+
+ p * log10(b) if b is a power of 10
+ floor((p - 1) * log10(b)) otherwise
+*/
+#undef FLT_DIG
+#undef DBL_DIG
+#undef LDBL_DIG
+#define FLT_DIG __FLT_DIG__
+#define DBL_DIG __DBL_DIG__
+#define LDBL_DIG __LDBL_DIG__
+
+/* Minimum int x such that FLT_RADIX**(x-1) is a normalized float, emin */
+#undef FLT_MIN_EXP
+#undef DBL_MIN_EXP
+#undef LDBL_MIN_EXP
+#define FLT_MIN_EXP __FLT_MIN_EXP__
+#define DBL_MIN_EXP __DBL_MIN_EXP__
+#define LDBL_MIN_EXP __LDBL_MIN_EXP__
+
+/* Minimum negative integer such that 10 raised to that power is in the
+ range of normalized floating-point numbers,
+
+ ceil(log10(b) * (emin - 1))
+*/
+#undef FLT_MIN_10_EXP
+#undef DBL_MIN_10_EXP
+#undef LDBL_MIN_10_EXP
+#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__
+#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__
+#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
+
+/* Maximum int x such that FLT_RADIX**(x-1) is a representable float, emax. */
+#undef FLT_MAX_EXP
+#undef DBL_MAX_EXP
+#undef LDBL_MAX_EXP
+#define FLT_MAX_EXP __FLT_MAX_EXP__
+#define DBL_MAX_EXP __DBL_MAX_EXP__
+#define LDBL_MAX_EXP __LDBL_MAX_EXP__
+
+/* Maximum integer such that 10 raised to that power is in the range of
+ representable finite floating-point numbers,
+
+ floor(log10((1 - b**-p) * b**emax))
+*/
+#undef FLT_MAX_10_EXP
+#undef DBL_MAX_10_EXP
+#undef LDBL_MAX_10_EXP
+#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__
+#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__
+#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
+
+/* Maximum representable finite floating-point number,
+
+ (1 - b**-p) * b**emax
+*/
+#undef FLT_MAX
+#undef DBL_MAX
+#undef LDBL_MAX
+#define FLT_MAX __FLT_MAX__
+#define DBL_MAX __DBL_MAX__
+#define LDBL_MAX __LDBL_MAX__
+
+/* The difference between 1 and the least value greater than 1 that is
+ representable in the given floating point type, b**1-p. */
+#undef FLT_EPSILON
+#undef DBL_EPSILON
+#undef LDBL_EPSILON
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+#define LDBL_EPSILON __LDBL_EPSILON__
+
+/* Minimum normalized positive floating-point number, b**(emin - 1). */
+#undef FLT_MIN
+#undef DBL_MIN
+#undef LDBL_MIN
+#define FLT_MIN __FLT_MIN__
+#define DBL_MIN __DBL_MIN__
+#define LDBL_MIN __LDBL_MIN__
+
+/* Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown. */
+/* ??? This is supposed to change with calls to fesetround in <fenv.h>. */
+#undef FLT_ROUNDS
+#define FLT_ROUNDS 1
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+/* The floating-point expression evaluation method.
+ -1 indeterminate
+ 0 evaluate all operations and constants just to the range and
+ precision of the type
+ 1 evaluate operations and constants of type float and double
+ to the range and precision of the double type, evaluate
+ long double operations and constants to the range and
+ precision of the long double type
+ 2 evaluate all operations and constants to the range and
+ precision of the long double type
+
+ ??? This ought to change with the setting of the fp control word;
+ the value provided by the compiler assumes the widest setting. */
+#undef FLT_EVAL_METHOD
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+
+/* Number of decimal digits, n, such that any floating-point number in the
+ widest supported floating type with pmax radix b digits can be rounded
+ to a floating-point number with n decimal digits and back again without
+ change to the value,
+
+ pmax * log10(b) if b is a power of 10
+ ceil(1 + pmax * log10(b)) otherwise
+*/
+#undef DECIMAL_DIG
+#define DECIMAL_DIG __DECIMAL_DIG__
+
+#endif /* C99 */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+/* Versions of DECIMAL_DIG for each floating-point type. */
+#undef FLT_DECIMAL_DIG
+#undef DBL_DECIMAL_DIG
+#undef LDBL_DECIMAL_DIG
+#define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
+#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
+#define LDBL_DECIMAL_DIG __DECIMAL_DIG__
+
+/* Whether types support subnormal numbers. */
+#undef FLT_HAS_SUBNORM
+#undef DBL_HAS_SUBNORM
+#undef LDBL_HAS_SUBNORM
+#define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
+#define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
+#define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
+
+/* Minimum positive values, including subnormals. */
+#undef FLT_TRUE_MIN
+#undef DBL_TRUE_MIN
+#undef LDBL_TRUE_MIN
+#if __FLT_HAS_DENORM__
+#define FLT_TRUE_MIN __FLT_DENORM_MIN__
+#else
+#define FLT_TRUE_MIN __FLT_MIN__
+#endif
+#if __DBL_HAS_DENORM__
+#define DBL_TRUE_MIN __DBL_DENORM_MIN__
+#else
+#define DBL_TRUE_MIN __DBL_MIN__
+#endif
+#if __LDBL_HAS_DENORM__
+#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
+#else
+#define LDBL_TRUE_MIN __LDBL_MIN__
+#endif
+
+#endif /* C11 */
+
+#ifdef __STDC_WANT_DEC_FP__
+/* Draft Technical Report 24732, extension for decimal floating-point
+ arithmetic: Characteristic of decimal floating types <float.h>. */
+
+/* Number of base-FLT_RADIX digits in the significand, p. */
+#undef DEC32_MANT_DIG
+#undef DEC64_MANT_DIG
+#undef DEC128_MANT_DIG
+#define DEC32_MANT_DIG __DEC32_MANT_DIG__
+#define DEC64_MANT_DIG __DEC64_MANT_DIG__
+#define DEC128_MANT_DIG __DEC128_MANT_DIG__
+
+/* Minimum exponent. */
+#undef DEC32_MIN_EXP
+#undef DEC64_MIN_EXP
+#undef DEC128_MIN_EXP
+#define DEC32_MIN_EXP __DEC32_MIN_EXP__
+#define DEC64_MIN_EXP __DEC64_MIN_EXP__
+#define DEC128_MIN_EXP __DEC128_MIN_EXP__
+
+/* Maximum exponent. */
+#undef DEC32_MAX_EXP
+#undef DEC64_MAX_EXP
+#undef DEC128_MAX_EXP
+#define DEC32_MAX_EXP __DEC32_MAX_EXP__
+#define DEC64_MAX_EXP __DEC64_MAX_EXP__
+#define DEC128_MAX_EXP __DEC128_MAX_EXP__
+
+/* Maximum representable finite decimal floating-point number
+ (there are 6, 15, and 33 9s after the decimal points respectively). */
+#undef DEC32_MAX
+#undef DEC64_MAX
+#undef DEC128_MAX
+#define DEC32_MAX __DEC32_MAX__
+#define DEC64_MAX __DEC64_MAX__
+#define DEC128_MAX __DEC128_MAX__
+
+/* The difference between 1 and the least value greater than 1 that is
+ representable in the given floating point type. */
+#undef DEC32_EPSILON
+#undef DEC64_EPSILON
+#undef DEC128_EPSILON
+#define DEC32_EPSILON __DEC32_EPSILON__
+#define DEC64_EPSILON __DEC64_EPSILON__
+#define DEC128_EPSILON __DEC128_EPSILON__
+
+/* Minimum normalized positive floating-point number. */
+#undef DEC32_MIN
+#undef DEC64_MIN
+#undef DEC128_MIN
+#define DEC32_MIN __DEC32_MIN__
+#define DEC64_MIN __DEC64_MIN__
+#define DEC128_MIN __DEC128_MIN__
+
+/* Minimum subnormal positive floating-point number. */
+#undef DEC32_SUBNORMAL_MIN
+#undef DEC64_SUBNORMAL_MIN
+#undef DEC128_SUBNORMAL_MIN
+#define DEC32_SUBNORMAL_MIN __DEC32_SUBNORMAL_MIN__
+#define DEC64_SUBNORMAL_MIN __DEC64_SUBNORMAL_MIN__
+#define DEC128_SUBNORMAL_MIN __DEC128_SUBNORMAL_MIN__
+
+/* The floating-point expression evaluation method.
+ -1 indeterminate
+ 0 evaluate all operations and constants just to the range and
+ precision of the type
+ 1 evaluate operations and constants of type _Decimal32
+ and _Decimal64 to the range and precision of the _Decimal64
+ type, evaluate _Decimal128 operations and constants to the
+ range and precision of the _Decimal128 type;
+ 2 evaluate all operations and constants to the range and
+ precision of the _Decimal128 type. */
+
+#undef DEC_EVAL_METHOD
+#define DEC_EVAL_METHOD __DEC_EVAL_METHOD__
+
+#endif /* __STDC_WANT_DEC_FP__ */
+
+#endif /* _FLOAT_H___ */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/fma4intrin.h b/lib/gcc/x86_64-linux-android/4.9/include/fma4intrin.h
new file mode 100644
index 0000000..e1bdef7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/fma4intrin.h
@@ -0,0 +1,241 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _FMA4INTRIN_H_INCLUDED
+#define _FMA4INTRIN_H_INCLUDED
+
+/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
+#include <ammintrin.h>
+
+#ifndef __FMA4__
+#pragma GCC push_options
+#pragma GCC target("fma4")
+#define __DISABLE_FMA4__
+#endif /* __FMA4__ */
+
+/* 128b Floating point multiply/add type instructions. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
+
+{
+ return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+/* 256b Floating point multiply/add type instructions. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
+
+{
+ return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+#ifdef __DISABLE_FMA4__
+#undef __DISABLE_FMA4__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA4__ */
+
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/fmaintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/fmaintrin.h
new file mode 100644
index 0000000..bfbb75d
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/fmaintrin.h
@@ -0,0 +1,302 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _FMAINTRIN_H_INCLUDED
+#define _FMAINTRIN_H_INCLUDED
+
+#ifndef __FMA__
+#pragma GCC push_options
+#pragma GCC target("fma")
+#define __DISABLE_FMA__
+#endif /* __FMA__ */
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+ (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+ -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+ -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+ -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+ -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+ -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+ -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+ (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
+ (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
+ (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+ -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+ -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+ -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+ -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
+ -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
+ -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+ (__v4df)__B,
+ (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+ (__v8sf)__B,
+ (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+ -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+ (__v4df)__B,
+ -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+ -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+ (__v8sf)__B,
+ -(__v8sf)__C);
+}
+
+#ifdef __DISABLE_FMA__
+#undef __DISABLE_FMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA__ */
+
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/fxsrintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/fxsrintrin.h
new file mode 100644
index 0000000..98e73ee
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/fxsrintrin.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <fxsrintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _FXSRINTRIN_H_INCLUDED
+#define _FXSRINTRIN_H_INCLUDED
+
+#ifndef __FXSR__
+#pragma GCC push_options
+#pragma GCC target("fxsr")
+#define __DISABLE_FXSR__
+#endif /* __FXSR__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxsave (void *__P)
+{
+ return __builtin_ia32_fxsave (__P);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxrstor (void *__P)
+{
+ return __builtin_ia32_fxrstor (__P);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxsave64 (void *__P)
+{
+ return __builtin_ia32_fxsave64 (__P);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_fxrstor64 (void *__P)
+{
+ return __builtin_ia32_fxrstor64 (__P);
+}
+#endif
+
+#ifdef __DISABLE_FXSR__
+#undef __DISABLE_FXSR__
+#pragma GCC pop_options
+#endif /* __DISABLE_FXSR__ */
+
+
+#endif /* _FXSRINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/ia32intrin.h b/lib/gcc/x86_64-linux-android/4.9/include/ia32intrin.h
new file mode 100644
index 0000000..5e7c893
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/ia32intrin.h
@@ -0,0 +1,293 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+/* 32bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfd (int __X)
+{
+ return __builtin_ctz (__X);
+}
+
+/* 32bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrd (int __X)
+{
+ return __builtin_ia32_bsrsi (__X);
+}
+
+/* 32bit bswap */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapd (int __X)
+{
+ return __builtin_bswap32 (__X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32b (unsigned int __C, unsigned char __V)
+{
+ return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32w (unsigned int __C, unsigned short __V)
+{
+ return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32d (unsigned int __C, unsigned int __V)
+{
+ return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+/* 32bit popcnt */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntd (unsigned int __X)
+{
+ return __builtin_popcount (__X);
+}
+
+/* rdpmc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdpmc (int __S)
+{
+ return __builtin_ia32_rdpmc (__S);
+}
+
+/* rdtsc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+ return __builtin_ia32_rdtsc ();
+}
+
+/* rdtscp */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+ return __builtin_ia32_rdtscp (__A);
+}
+
+/* 8bit rol */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolb (unsigned char __X, int __C)
+{
+ return __builtin_ia32_rolqi (__X, __C);
+}
+
+/* 16bit rol */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolw (unsigned short __X, int __C)
+{
+ return __builtin_ia32_rolhi (__X, __C);
+}
+
+/* 32bit rol */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rold (unsigned int __X, int __C)
+{
+ return (__X << __C) | (__X >> (32 - __C));
+}
+
+/* 8bit ror */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorb (unsigned char __X, int __C)
+{
+ return __builtin_ia32_rorqi (__X, __C);
+}
+
+/* 16bit ror */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorw (unsigned short __X, int __C)
+{
+ return __builtin_ia32_rorhi (__X, __C);
+}
+
+/* 32bit ror */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rord (unsigned int __X, int __C)
+{
+ return (__X >> __C) | (__X << (32 - __C));
+}
+
+/* Pause */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__pause (void)
+{
+ __builtin_ia32_pause ();
+}
+
+#ifdef __x86_64__
+/* 64bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfq (long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+
+/* 64bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrq (long long __X)
+{
+ return __builtin_ia32_bsrdi (__X);
+}
+
+/* 64bit bswap */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapq (long long __X)
+{
+ return __builtin_bswap64 (__X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32q (unsigned long long __C, unsigned long long __V)
+{
+ return __builtin_ia32_crc32di (__C, __V);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+/* 64bit popcnt */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntq (unsigned long long __X)
+{
+ return __builtin_popcountll (__X);
+}
+
+/* 64bit rol */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolq (unsigned long long __X, int __C)
+{
+ return (__X << __C) | (__X >> (64 - __C));
+}
+
+/* 64bit ror */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorq (unsigned long long __X, int __C)
+{
+ return (__X >> __C) | (__X << (64 - __C));
+}
+
+/* Read flags register */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__readeflags (void)
+{
+ return __builtin_ia32_readeflags_u64 ();
+}
+
+/* Write flags register */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__writeeflags (unsigned long long X)
+{
+ __builtin_ia32_writeeflags_u64 (X);
+}
+
+#define _bswap64(a) __bswapq(a)
+#define _popcnt64(a) __popcntq(a)
+#define _lrotl(a,b) __rolq((a), (b))
+#define _lrotr(a,b) __rorq((a), (b))
+#else
+#define _lrotl(a,b) __rold((a), (b))
+#define _lrotr(a,b) __rord((a), (b))
+
+/* Read flags register */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__readeflags (void)
+{
+ return __builtin_ia32_readeflags_u32 ();
+}
+
+/* Write flags register */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__writeeflags (unsigned int X)
+{
+ __builtin_ia32_writeeflags_u32 (X);
+}
+
+#endif
+
+#define _bit_scan_forward(a) __bsfd(a)
+#define _bit_scan_reverse(a) __bsrd(a)
+#define _bswap(a) __bswapd(a)
+#define _popcnt32(a) __popcntd(a)
+#define _rdpmc(a) __rdpmc(a)
+#define _rdtsc() __rdtsc()
+#define _rdtscp(a) __rdtscp(a)
+#define _rotwl(a,b) __rolw((a), (b))
+#define _rotwr(a,b) __rorw((a), (b))
+#define _rotl(a,b) __rold((a), (b))
+#define _rotr(a,b) __rord((a), (b))
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/immintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/immintrin.h
new file mode 100644
index 0000000..73b4859
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/immintrin.h
@@ -0,0 +1,177 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#define _IMMINTRIN_H_INCLUDED
+
+#include <mmintrin.h>
+
+#include <xmmintrin.h>
+
+#include <emmintrin.h>
+
+#include <pmmintrin.h>
+
+#include <tmmintrin.h>
+
+#include <smmintrin.h>
+
+#include <wmmintrin.h>
+
+#include <avxintrin.h>
+
+#include <avx2intrin.h>
+
+#include <avx512fintrin.h>
+
+#include <avx512erintrin.h>
+
+#include <avx512pfintrin.h>
+
+#include <avx512cdintrin.h>
+
+#include <shaintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <fmaintrin.h>
+
+#include <f16cintrin.h>
+
+#include <rtmintrin.h>
+
+#include <xtestintrin.h>
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand16_step (unsigned short *__P)
+{
+ return __builtin_ia32_rdrand16_step (__P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand32_step (unsigned int *__P)
+{
+ return __builtin_ia32_rdrand32_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#ifdef __x86_64__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+ return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+ return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+ return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+ return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrgsbase64 (__B);
+}
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand64_step (unsigned long long *__P)
+{
+ return __builtin_ia32_rdrand64_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#endif /* __x86_64__ */
+
+#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/iso646.h b/lib/gcc/x86_64-linux-android/4.9/include/iso646.h
new file mode 100644
index 0000000..89bc8f4
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/iso646.h
@@ -0,0 +1,45 @@
+/* Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 7.9 Alternative spellings <iso646.h>
+ */
+
+#ifndef _ISO646_H
+#define _ISO646_H
+
+#ifndef __cplusplus
+#define and &&
+#define and_eq &=
+#define bitand &
+#define bitor |
+#define compl ~
+#define not !
+#define not_eq !=
+#define or ||
+#define or_eq |=
+#define xor ^
+#define xor_eq ^=
+#endif
+
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/lwpintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/lwpintrin.h
new file mode 100644
index 0000000..1cd046a
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/lwpintrin.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+#pragma GCC push_options
+#pragma GCC target("lwp")
+#define __DISABLE_LWP__
+#endif /* __LWP__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb (void *pcbAddress)
+{
+ __builtin_ia32_llwpcb (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb (void)
+{
+ return __builtin_ia32_slwpcb ();
+}
+
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpval32(D2, D1, F) \
+ (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpval64(D2, D1, F) \
+ (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#endif
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpins32(D2, D1, F) \
+ (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpins64(D2, D1, F) \
+ (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#endif
+#endif
+
+#ifdef __DISABLE_LWP__
+#undef __DISABLE_LWP__
+#pragma GCC pop_options
+#endif /* __DISABLE_LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/lzcntintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/lzcntintrin.h
new file mode 100644
index 0000000..b680a35
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/lzcntintrin.h
@@ -0,0 +1,75 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+
+#ifndef _LZCNTINTRIN_H_INCLUDED
+#define _LZCNTINTRIN_H_INCLUDED
+
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __DISABLE_LZCNT__
+#endif /* __LZCNT__ */
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt16 (unsigned short __X)
+{
+ return __builtin_clzs (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt32 (unsigned int __X)
+{
+ return __builtin_clz (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_lzcnt_u32 (unsigned int __X)
+{
+ return __builtin_clz (__X);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt64 (unsigned long long __X)
+{
+ return __builtin_clzll (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_lzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_clzll (__X);
+}
+#endif
+
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_LZCNT__ */
+
+#endif /* _LZCNTINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/mm3dnow.h b/lib/gcc/x86_64-linux-android/4.9/include/mm3dnow.h
new file mode 100644
index 0000000..bf847f9
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/mm3dnow.h
@@ -0,0 +1,218 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
+ MSVC 7.1. */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#include <mmintrin.h>
+#include <prfchwintrin.h>
+
+#ifndef __3dNOW__
+#pragma GCC push_options
+#pragma GCC target("3dnow")
+#define __DISABLE_3dNOW__
+#endif /* __3dNOW__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_femms (void)
+{
+ __builtin_ia32_femms();
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgusb (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2id (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfadd (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpeq (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpge (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpgt (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmax (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmin (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmul (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcp (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit2 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqrt (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsub (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsubr (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhrw (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetch (void *__P)
+{
+ __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_float (float __A)
+{
+ return __extension__ (__m64)(__v2sf){ __A, 0.0f };
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_float (__m64 __A)
+{
+ union { __v2sf v; float a[2]; } __tmp;
+ __tmp.v = (__v2sf)__A;
+ return __tmp.a[0];
+}
+
+#ifdef __3dNOW_A__
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2iw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfpnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pswapd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
+}
+
+#endif /* __3dNOW_A__ */
+
+#ifdef __DISABLE_3dNOW__
+#undef __DISABLE_3dNOW__
+#pragma GCC pop_options
+#endif /* __DISABLE_3dNOW__ */
+
+#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/mm_malloc.h b/lib/gcc/x86_64-linux-android/4.9/include/mm_malloc.h
new file mode 100644
index 0000000..67b88a8
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/mm_malloc.h
@@ -0,0 +1,63 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+#if !defined(__ANDROID__) || defined(HAVE_POSIX_MEMALIGN)
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+ may not be visible. */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+ void *ptr;
+ if (alignment == 1)
+ return malloc (size);
+ if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+ alignment = sizeof (void *);
+#if !defined(__ANDROID__) || defined(HAVE_POSIX_MEMALIGN)
+ if (posix_memalign (&ptr, alignment, size) == 0)
+ return ptr;
+ else
+ return NULL;
+#else
+ return memalign(alignment, size);
+#endif
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+ free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/mmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/mmintrin.h
new file mode 100644
index 0000000..b351200
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/mmintrin.h
@@ -0,0 +1,942 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __MMX__
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __DISABLE_MMX__
+#endif /* __MMX__ */
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+/* Empty the multimedia state. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+ __builtin_ia32_emms ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_empty (void)
+{
+ _mm_empty ();
+}
+
+/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si64 (int __i)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int (int __i)
+{
+ return _mm_cvtsi32_si64 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert I to a __m64 object. */
+
+/* Intel intrinsic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_m64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi64x (long long __i)
+{
+ return (__m64) __i;
+}
+#endif
+
+/* Convert the lower 32 bits of the __m64 object into an integer. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si32 (__m64 __i)
+{
+ return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int (__m64 __i)
+{
+ return _mm_cvtsi64_si32 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert the __m64 object to a 64bit integer. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtm64_si64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si64x (__m64 __i)
+{
+ return (long long)__i;
+}
+#endif
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with signed saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi16 (__m1, __m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+ the result, and the two 32-bit values from M2 into the upper two 16-bit
+ values of the result, all with signed saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi32 (__m1, __m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with unsigned saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pu16 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+ 8-bit values from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+ 16-bit values from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+ value from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+ 8-bit values from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+ 16-bit values from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+ value from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi16 (__m1, __m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
+}
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi16 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi16 (__m1, __m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
+}
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+ saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ signed saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+ unsigned saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ unsigned saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+ four 32-bit intermediate results, which are then summed by pairs to
+ produce two 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_madd_pi16 (__m1, __m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+ M2 and produce the high 16 bits of the 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mulhi_pi16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+ the low 16 bits of the results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mullo_pi16 (__m1, __m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllw (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllwi (__m64 __m, int __count)
+{
+ return _mm_slli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslld (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslldi (__m64 __m, int __count)
+{
+ return _mm_slli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllq (__m64 __m, __m64 __count)
+{
+ return _mm_sll_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si64 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllqi (__m64 __m, int __count)
+{
+ return _mm_slli_si64 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psraw (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrawi (__m64 __m, int __count)
+{
+ return _mm_srai_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrad (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psradi (__m64 __m, int __count)
+{
+ return _mm_srai_pi32 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlw (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlwi (__m64 __m, int __count)
+{
+ return _mm_srli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrld (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrldi (__m64 __m, int __count)
+{
+ return _mm_srli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlq (__m64 __m, __m64 __count)
+{
+ return _mm_srl_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si64 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlqi (__m64 __m, int __count)
+{
+ return _mm_srli_si64 (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pand (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pand (__m64 __m1, __m64 __m2)
+{
+ return _mm_and_si64 (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+ 64-bit value in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pandn (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+ return _mm_andnot_si64 (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_por (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_por (__m64 __m1, __m64 __m2)
+{
+ return _mm_or_si64 (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pxor (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+ return _mm_xor_si64 (__m1, __m2);
+}
+
+/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
+ test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
+/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
+ the test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
+/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
+ the test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
+/* Creates a 64-bit zero. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si64 (void)
+{
+ return (__m64)0LL;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi32 (int __i1, int __i0)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+ return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+ char __b3, char __b2, char __b1, char __b0)
+{
+ return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
+ __b4, __b5, __b6, __b7);
+}
+
+/* Similar, but with the arguments in reverse order. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi32 (int __i0, int __i1)
+{
+ return _mm_set_pi32 (__i1, __i0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+ return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+ char __b4, char __b5, char __b6, char __b7)
+{
+ return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi32 (int __i)
+{
+ return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi16 (short __w)
+{
+ return _mm_set_pi16 (__w, __w, __w, __w);
+}
+
+/* Creates a vector of eight 8-bit values, all elements containing B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi8 (char __b)
+{
+ return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
+}
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#pragma GCC pop_options
+#endif /* __DISABLE_MMX__ */
+
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/nmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/nmmintrin.h
new file mode 100644
index 0000000..9fc7107
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/nmmintrin.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _NMMINTRIN_H_INCLUDED
+#define _NMMINTRIN_H_INCLUDED
+
+/* We just include SSE4.1 header file. */
+#include <smmintrin.h>
+
+#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/omp.h b/lib/gcc/x86_64-linux-android/4.9/include/omp.h
new file mode 100644
index 0000000..b1824b5
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/omp.h
@@ -0,0 +1,127 @@
+/* Copyright (C) 2005-2014 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU OpenMP Library (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _OMP_H
+#define _OMP_H 1
+
+#ifndef _LIBGOMP_OMP_LOCK_DEFINED
+#define _LIBGOMP_OMP_LOCK_DEFINED 1
+/* These two structures get edited by the libgomp build process to
+ reflect the shape of the two types. Their internals are private
+ to the library. */
+
+typedef struct
+{
+ unsigned char _x[4]
+ __attribute__((__aligned__(4)));
+} omp_lock_t;
+
+typedef struct
+{
+ unsigned char _x[12]
+ __attribute__((__aligned__(4)));
+} omp_nest_lock_t;
+#endif
+
+typedef enum omp_sched_t
+{
+ omp_sched_static = 1,
+ omp_sched_dynamic = 2,
+ omp_sched_guided = 3,
+ omp_sched_auto = 4
+} omp_sched_t;
+
+typedef enum omp_proc_bind_t
+{
+ omp_proc_bind_false = 0,
+ omp_proc_bind_true = 1,
+ omp_proc_bind_master = 2,
+ omp_proc_bind_close = 3,
+ omp_proc_bind_spread = 4
+} omp_proc_bind_t;
+
+#ifdef __cplusplus
+extern "C" {
+# define __GOMP_NOTHROW throw ()
+#else
+# define __GOMP_NOTHROW __attribute__((__nothrow__))
+#endif
+
+extern void omp_set_num_threads (int) __GOMP_NOTHROW;
+extern int omp_get_num_threads (void) __GOMP_NOTHROW;
+extern int omp_get_max_threads (void) __GOMP_NOTHROW;
+extern int omp_get_thread_num (void) __GOMP_NOTHROW;
+extern int omp_get_num_procs (void) __GOMP_NOTHROW;
+
+extern int omp_in_parallel (void) __GOMP_NOTHROW;
+
+extern void omp_set_dynamic (int) __GOMP_NOTHROW;
+extern int omp_get_dynamic (void) __GOMP_NOTHROW;
+
+extern void omp_set_nested (int) __GOMP_NOTHROW;
+extern int omp_get_nested (void) __GOMP_NOTHROW;
+
+extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW;
+extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW;
+extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW;
+extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW;
+extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW;
+
+extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+extern void omp_unset_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+extern int omp_test_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+
+extern double omp_get_wtime (void) __GOMP_NOTHROW;
+extern double omp_get_wtick (void) __GOMP_NOTHROW;
+
+extern void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW;
+extern void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW;
+extern int omp_get_thread_limit (void) __GOMP_NOTHROW;
+extern void omp_set_max_active_levels (int) __GOMP_NOTHROW;
+extern int omp_get_max_active_levels (void) __GOMP_NOTHROW;
+extern int omp_get_level (void) __GOMP_NOTHROW;
+extern int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW;
+extern int omp_get_team_size (int) __GOMP_NOTHROW;
+extern int omp_get_active_level (void) __GOMP_NOTHROW;
+
+extern int omp_in_final (void) __GOMP_NOTHROW;
+
+extern int omp_get_cancellation (void) __GOMP_NOTHROW;
+extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
+
+extern void omp_set_default_device (int) __GOMP_NOTHROW;
+extern int omp_get_default_device (void) __GOMP_NOTHROW;
+extern int omp_get_num_devices (void) __GOMP_NOTHROW;
+extern int omp_get_num_teams (void) __GOMP_NOTHROW;
+extern int omp_get_team_num (void) __GOMP_NOTHROW;
+
+extern int omp_is_initial_device (void) __GOMP_NOTHROW;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _OMP_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/pmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/pmmintrin.h
new file mode 100644
index 0000000..6a79500
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/pmmintrin.h
@@ -0,0 +1,132 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __DISABLE_SSE3__
+#endif /* __SSE3__ */
+
+/* Additional bits in the MXCSR. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#define _MM_DENORMALS_ZERO_OFF 0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+ _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+ (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+ return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_monitor (__P, __E, __H);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_mwait (__E, __H);
+}
+
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE3__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/popcntintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/popcntintrin.h
new file mode 100644
index 0000000..41845d8
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/popcntintrin.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _POPCNTINTRIN_H_INCLUDED
+#define _POPCNTINTRIN_H_INCLUDED
+
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __DISABLE_POPCNT__
+#endif /* __POPCNT__ */
+
+/* Calculate a number of bits set to 1. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+ return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+ return __builtin_popcountll (__X);
+}
+#endif
+
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_POPCNT__ */
+
+#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/prfchwintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/prfchwintrin.h
new file mode 100644
index 0000000..b2f5772
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/prfchwintrin.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED && !defined _MM3DNOW_H_INCLUDED
+# error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
+#endif
+
+#ifndef _PRFCHWINTRIN_H_INCLUDED
+#define _PRFCHWINTRIN_H_INCLUDED
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchw (void *__P)
+{
+ __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+}
+
+#endif /* _PRFCHWINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/rdseedintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/rdseedintrin.h
new file mode 100644
index 0000000..0ab18e5
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/rdseedintrin.h
@@ -0,0 +1,66 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _RDSEEDINTRIN_H_INCLUDED
+#define _RDSEEDINTRIN_H_INCLUDED
+
+#ifndef __RDSEED__
+#pragma GCC push_options
+#pragma GCC target("rdseed")
+#define __DISABLE_RDSEED__
+#endif /* __RDSEED__ */
+
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed16_step (unsigned short *p)
+{
+ return __builtin_ia32_rdseed_hi_step (p);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed32_step (unsigned int *p)
+{
+ return __builtin_ia32_rdseed_si_step (p);
+}
+
+#ifdef __x86_64__
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdseed64_step (unsigned long long *p)
+{
+ return __builtin_ia32_rdseed_di_step (p);
+}
+#endif
+
+#ifdef __DISABLE_RDSEED__
+#undef __DISABLE_RDSEED__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDSEED__ */
+
+#endif /* _RDSEEDINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/rtmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/rtmintrin.h
new file mode 100644
index 0000000..ac40d22
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/rtmintrin.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _RTMINTRIN_H_INCLUDED
+#define _RTMINTRIN_H_INCLUDED
+
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
+#define _XBEGIN_STARTED (~0u)
+#define _XABORT_EXPLICIT (1 << 0)
+#define _XABORT_RETRY (1 << 1)
+#define _XABORT_CONFLICT (1 << 2)
+#define _XABORT_CAPACITY (1 << 3)
+#define _XABORT_DEBUG (1 << 4)
+#define _XABORT_NESTED (1 << 5)
+#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
+
+/* Start an RTM code region. Return _XBEGIN_STARTED on success and the
+ abort condition otherwise. */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xbegin (void)
+{
+ return __builtin_ia32_xbegin ();
+}
+
+/* Specify the end of an RTM code region. If it corresponds to the
+ outermost transaction, then attempts the transaction commit. If the
+ commit fails, then control is transferred to the outermost transaction
+ fallback handler. */
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xend (void)
+{
+ __builtin_ia32_xend ();
+}
+
+/* Force an RTM abort condition. The control is transferred to the
+ outermost transaction fallback handler with the abort condition IMM. */
+#ifdef __OPTIMIZE__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xabort (const unsigned int imm)
+{
+ __builtin_ia32_xabort (imm);
+}
+#else
+#define _xabort(N) __builtin_ia32_xabort (N)
+#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
+#endif /* _RTMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/shaintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/shaintrin.h
new file mode 100644
index 0000000..d8a3da3
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/shaintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SHAINTRIN_H_INCLUDED
+#define _SHAINTRIN_H_INCLUDED
+
+#ifndef __SHA__
+#pragma GCC push_options
+#pragma GCC target("sha")
+#define __DISABLE_SHA__
+#endif /* __SHA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
+{
+ return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
+}
+#else
+#define _mm_sha1rnds4_epu32(A, B, I) \
+ ((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)A, \
+ (__v4si)(__m128i)B, (int)I))
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
+ (__v4si) __C);
+}
+
+#ifdef __DISABLE_SHA__
+#undef __DISABLE_SHA__
+#pragma GCC pop_options
+#endif /* __DISABLE_SHA__ */
+
+#endif /* _SHAINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/smmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/smmintrin.h
new file mode 100644
index 0000000..886ace4
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/smmintrin.h
@@ -0,0 +1,862 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _SMMINTRIN_H_INCLUDED
+#define _SMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
+ files. */
+#include <tmmintrin.h>
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NO_EXC 0x08
+
+#define _MM_FROUND_NINT \
+ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR \
+ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL \
+ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC \
+ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* Test Instruction */
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & ~__M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) != 0 && (__V & ~__M) != 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics. */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Packed/scalar double precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+ (__v2df)__V,
+ __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+ ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
+
+#define _mm_round_sd(D, V, M) \
+ ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
+ (__v2df)(__m128d)(V), (int)(M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+ (__v4sf)__V,
+ __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+ ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
+
+#define _mm_round_ss(D, V, M) \
+ ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(V), (int)(M)))
+#endif
+
+/* Macros for ceil/floor intrinsics. */
+#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
+
+/* SSE4.1 */
+
+/* Integer blend instructions - select data from 2 sources using
+ constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
+ (__v8hi)__Y,
+ __M);
+}
+#else
+#define _mm_blend_epi16(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(M)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
+{
+ return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ (__v16qi)__M);
+}
+
+/* Single precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+#else
+#define _mm_blend_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
+{
+ return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
+ (__v4sf)__Y,
+ (__v4sf)__M);
+}
+
+/* Double precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_blend_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
+{
+ return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
+ (__v2df)__Y,
+ (__v2df)__M);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_dp_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Min/max packed integer instructions. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication with truncation of upper
+ halves of results. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication of 2 pairs of operands
+ with two 64-bit results. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Insert single precision float into packed single precision array
+ element selected by index N. The bits [7-6] of N define S
+ index, the bits [5-4] define D index, and bits [3-0] define
+ zeroing mask for D. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
+{
+ return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
+ (__v4sf)__S,
+ __N);
+}
+#else
+#define _mm_insert_ps(D, S, N) \
+ ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(S), (int)(N)))
+#endif
+
+/* Helper macro to create the N value for _mm_insert_ps. */
+#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
+
+/* Extract binary representation of single precision float from packed
+ single precision array element of X selected by index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+ union { int i; float f; } __tmp;
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
+ return __tmp.i;
+}
+#else
+#define _mm_extract_ps(X, N) \
+ (__extension__ \
+ ({ \
+ union { int i; float f; } __tmp; \
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+ __tmp.i; \
+ }))
+#endif
+
+/* Extract binary representation of single precision float into
+ D from packed single precision array element of S selected
+ by index N. */
+#define _MM_EXTRACT_FLOAT(D, S, N) \
+ { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
+
+/* Extract specified single precision float element into the lower
+ part of __m128. */
+#define _MM_PICK_OUT_PS(X, N) \
+ _mm_insert_ps (_mm_setzero_ps (), (X), \
+ _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
+
+/* Insert integer, S, into packed integer array element of D
+ selected by index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
+ __S, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
+ __S, __N);
+}
+
+#ifdef __x86_64__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
+ __S, __N);
+}
+#endif
+#else
+#define _mm_insert_epi8(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \
+ (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
+ (int)(S), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_insert_epi64(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \
+ (long long)(S), (int)(N)))
+#endif
+#endif
+
+/* Extract integer from packed integer array element of X selected by
+ index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+ return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
+}
+#endif
+#else
+#define _mm_extract_epi8(X, N) \
+ ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
+#define _mm_extract_epi32(X, N) \
+ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) \
+ ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
+#endif
+#endif
+
+/* Return horizontal packed word minimum and its index in bits [15:0]
+ and bits [18:16] respectively. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
+}
+
+/* Packed integer sign-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
+}
+
+/* Packed integer zero-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
+}
+
+/* Pack 8 double words from 2 operands into 8 words of result with
+ unsigned saturation. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+ byte integers in the first 2 operands. Starting offsets within
+ operands are determined by the 3rd mask operand. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
+ (__v16qi)__Y, __M);
+}
+#else
+#define _mm_mpsadbw_epu8(X, Y, M) \
+ ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#endif
+
+/* Load double quadword using non-temporal aligned hint. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_load_si128 (__m128i *__X)
+{
+ return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
+}
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
+/* These macros specify the source data format. */
+#define _SIDD_UBYTE_OPS 0x00
+#define _SIDD_UWORD_OPS 0x01
+#define _SIDD_SBYTE_OPS 0x02
+#define _SIDD_SWORD_OPS 0x03
+
+/* These macros specify the comparison operation. */
+#define _SIDD_CMP_EQUAL_ANY 0x00
+#define _SIDD_CMP_RANGES 0x04
+#define _SIDD_CMP_EQUAL_EACH 0x08
+#define _SIDD_CMP_EQUAL_ORDERED 0x0c
+
+/* These macros specify the polarity. */
+#define _SIDD_POSITIVE_POLARITY 0x00
+#define _SIDD_NEGATIVE_POLARITY 0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
+
+/* These macros specify the output selection in _mm_cmpXstri (). */
+#define _SIDD_LEAST_SIGNIFICANT 0x00
+#define _SIDD_MOST_SIGNIFICANT 0x40
+
+/* These macros specify the output selection in _mm_cmpXstrm (). */
+#define _SIDD_BIT_MASK 0x00
+#define _SIDD_UNIT_MASK 0x40
+
+/* Intrinsics for text/string processing. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistrm(X, Y, M) \
+ ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M) \
+ ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \
+ (int)(LX), (__v16qi)(__m128i)(Y), \
+ (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#endif
+
+/* Intrinsics for text/string processing and reading values of
+ EFlags. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistra(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
+#include <popcntintrin.h>
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_1__ */
+
+/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u8 (unsigned int __C, unsigned char __V)
+{
+ return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u16 (unsigned int __C, unsigned short __V)
+{
+ return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u32 (unsigned int __C, unsigned int __V)
+{
+ return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
+{
+ return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
+#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdalign.h b/lib/gcc/x86_64-linux-android/4.9/include/stdalign.h
new file mode 100644
index 0000000..ee2d81f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdalign.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* ISO C1X: 7.15 Alignment <stdalign.h>. */
+
+#ifndef _STDALIGN_H
+#define _STDALIGN_H
+
+#ifndef __cplusplus
+
+#define alignas _Alignas
+#define alignof _Alignof
+
+#define __alignas_is_defined 1
+#define __alignof_is_defined 1
+
+#endif
+
+#endif /* stdalign.h */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdarg.h b/lib/gcc/x86_64-linux-android/4.9/include/stdarg.h
new file mode 100644
index 0000000..1d4418b
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdarg.h
@@ -0,0 +1,126 @@
+/* Copyright (C) 1989-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 7.15 Variable arguments <stdarg.h>
+ */
+
+#ifndef _STDARG_H
+#ifndef _ANSI_STDARG_H_
+#ifndef __need___va_list
+#define _STDARG_H
+#define _ANSI_STDARG_H_
+#endif /* not __need___va_list */
+#undef __need___va_list
+
+/* Define __gnuc_va_list. */
+
+#ifndef __GNUC_VA_LIST
+#define __GNUC_VA_LIST
+typedef __builtin_va_list __gnuc_va_list;
+#endif
+
+/* Define the standard macros for the user,
+ if this invocation was from the user program. */
+#ifdef _STDARG_H
+
+#define va_start(v,l) __builtin_va_start(v,l)
+#define va_end(v) __builtin_va_end(v)
+#define va_arg(v,l) __builtin_va_arg(v,l)
+#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#define va_copy(d,s) __builtin_va_copy(d,s)
+#endif
+#define __va_copy(d,s) __builtin_va_copy(d,s)
+
+/* Define va_list, if desired, from __gnuc_va_list. */
+/* We deliberately do not define va_list when called from
+ stdio.h, because ANSI C says that stdio.h is not supposed to define
+ va_list. stdio.h needs to have access to that data type,
+ but must not use that name. It should use the name __gnuc_va_list,
+ which is safe because it is reserved for the implementation. */
+
+#ifdef _BSD_VA_LIST
+#undef _BSD_VA_LIST
+#endif
+
+#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
+/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
+ so we must avoid testing it and setting it here.
+ SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
+ have no conflict with that. */
+#ifndef _VA_LIST_
+#define _VA_LIST_
+#ifdef __i860__
+#ifndef _VA_LIST
+#define _VA_LIST va_list
+#endif
+#endif /* __i860__ */
+typedef __gnuc_va_list va_list;
+#ifdef _SCO_DS
+#define __VA_LIST
+#endif
+#endif /* _VA_LIST_ */
+#else /* not __svr4__ || _SCO_DS */
+
+/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
+ But on BSD NET2 we must not test or define or undef it.
+ (Note that the comments in NET 2's ansi.h
+ are incorrect for _VA_LIST_--see stdio.h!) */
+#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
+/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5 */
+#ifndef _VA_LIST_DEFINED
+/* The macro _VA_LIST is used in SCO Unix 3.2. */
+#ifndef _VA_LIST
+/* The macro _VA_LIST_T_H is used in the Bull dpx2 */
+#ifndef _VA_LIST_T_H
+/* The macro __va_list__ is used by BeOS. */
+#ifndef __va_list__
+typedef __gnuc_va_list va_list;
+#endif /* not __va_list__ */
+#endif /* not _VA_LIST_T_H */
+#endif /* not _VA_LIST */
+#endif /* not _VA_LIST_DEFINED */
+#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
+#define _VA_LIST_
+#endif
+#ifndef _VA_LIST
+#define _VA_LIST
+#endif
+#ifndef _VA_LIST_DEFINED
+#define _VA_LIST_DEFINED
+#endif
+#ifndef _VA_LIST_T_H
+#define _VA_LIST_T_H
+#endif
+#ifndef __va_list__
+#define __va_list__
+#endif
+
+#endif /* not _VA_LIST_, except on certain systems */
+
+#endif /* not __svr4__ */
+
+#endif /* _STDARG_H */
+
+#endif /* not _ANSI_STDARG_H_ */
+#endif /* not _STDARG_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdatomic.h b/lib/gcc/x86_64-linux-android/4.9/include/stdatomic.h
new file mode 100644
index 0000000..108259b
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdatomic.h
@@ -0,0 +1,252 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* ISO C11 Standard: 7.17 Atomics <stdatomic.h>. */
+
+#ifndef _STDATOMIC_H
+#define _STDATOMIC_H
+
+typedef enum
+ {
+ memory_order_relaxed = __ATOMIC_RELAXED,
+ memory_order_consume = __ATOMIC_CONSUME,
+ memory_order_acquire = __ATOMIC_ACQUIRE,
+ memory_order_release = __ATOMIC_RELEASE,
+ memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ memory_order_seq_cst = __ATOMIC_SEQ_CST
+ } memory_order;
+
+
+typedef _Atomic _Bool atomic_bool;
+typedef _Atomic char atomic_char;
+typedef _Atomic signed char atomic_schar;
+typedef _Atomic unsigned char atomic_uchar;
+typedef _Atomic short atomic_short;
+typedef _Atomic unsigned short atomic_ushort;
+typedef _Atomic int atomic_int;
+typedef _Atomic unsigned int atomic_uint;
+typedef _Atomic long atomic_long;
+typedef _Atomic unsigned long atomic_ulong;
+typedef _Atomic long long atomic_llong;
+typedef _Atomic unsigned long long atomic_ullong;
+typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
+typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
+typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;
+typedef _Atomic __INT_LEAST8_TYPE__ atomic_int_least8_t;
+typedef _Atomic __UINT_LEAST8_TYPE__ atomic_uint_least8_t;
+typedef _Atomic __INT_LEAST16_TYPE__ atomic_int_least16_t;
+typedef _Atomic __UINT_LEAST16_TYPE__ atomic_uint_least16_t;
+typedef _Atomic __INT_LEAST32_TYPE__ atomic_int_least32_t;
+typedef _Atomic __UINT_LEAST32_TYPE__ atomic_uint_least32_t;
+typedef _Atomic __INT_LEAST64_TYPE__ atomic_int_least64_t;
+typedef _Atomic __UINT_LEAST64_TYPE__ atomic_uint_least64_t;
+typedef _Atomic __INT_FAST8_TYPE__ atomic_int_fast8_t;
+typedef _Atomic __UINT_FAST8_TYPE__ atomic_uint_fast8_t;
+typedef _Atomic __INT_FAST16_TYPE__ atomic_int_fast16_t;
+typedef _Atomic __UINT_FAST16_TYPE__ atomic_uint_fast16_t;
+typedef _Atomic __INT_FAST32_TYPE__ atomic_int_fast32_t;
+typedef _Atomic __UINT_FAST32_TYPE__ atomic_uint_fast32_t;
+typedef _Atomic __INT_FAST64_TYPE__ atomic_int_fast64_t;
+typedef _Atomic __UINT_FAST64_TYPE__ atomic_uint_fast64_t;
+typedef _Atomic __INTPTR_TYPE__ atomic_intptr_t;
+typedef _Atomic __UINTPTR_TYPE__ atomic_uintptr_t;
+typedef _Atomic __SIZE_TYPE__ atomic_size_t;
+typedef _Atomic __PTRDIFF_TYPE__ atomic_ptrdiff_t;
+typedef _Atomic __INTMAX_TYPE__ atomic_intmax_t;
+typedef _Atomic __UINTMAX_TYPE__ atomic_uintmax_t;
+
+
+#define ATOMIC_VAR_INIT(VALUE) (VALUE)
+#define atomic_init(PTR, VAL) \
+ do \
+ { \
+ *(PTR) = (VAL); \
+ } \
+ while (0)
+
+#define kill_dependency(Y) \
+ __extension__ \
+ ({ \
+ __auto_type __kill_dependency_tmp = (Y); \
+ __kill_dependency_tmp; \
+ })
+
+#define atomic_thread_fence(MO) __atomic_thread_fence (MO)
+#define atomic_signal_fence(MO) __atomic_signal_fence (MO)
+#define atomic_is_lock_free(OBJ) __atomic_is_lock_free (sizeof (*(OBJ)), (OBJ))
+
+#define __atomic_type_lock_free(T) \
+ (__atomic_always_lock_free (sizeof (T), (void *) 0) \
+ ? 2 \
+ : (__atomic_is_lock_free (sizeof (T), (void *) 0) ? 1 : 0))
+#define ATOMIC_BOOL_LOCK_FREE \
+ __atomic_type_lock_free (atomic_bool)
+#define ATOMIC_CHAR_LOCK_FREE \
+ __atomic_type_lock_free (atomic_char)
+#define ATOMIC_CHAR16_T_LOCK_FREE \
+ __atomic_type_lock_free (atomic_char16_t)
+#define ATOMIC_CHAR32_T_LOCK_FREE \
+ __atomic_type_lock_free (atomic_char32_t)
+#define ATOMIC_WCHAR_T_LOCK_FREE \
+ __atomic_type_lock_free (atomic_wchar_t)
+#define ATOMIC_SHORT_LOCK_FREE \
+ __atomic_type_lock_free (atomic_short)
+#define ATOMIC_INT_LOCK_FREE \
+ __atomic_type_lock_free (atomic_int)
+#define ATOMIC_LONG_LOCK_FREE \
+ __atomic_type_lock_free (atomic_long)
+#define ATOMIC_LLONG_LOCK_FREE \
+ __atomic_type_lock_free (atomic_llong)
+#define ATOMIC_POINTER_LOCK_FREE \
+ __atomic_type_lock_free (void * _Atomic)
+
+
+/* Note that these macros require __typeof__ and __auto_type to remove
+ _Atomic qualifiers (and const qualifiers, if those are valid on
+ macro operands).
+
+ Also note that the header file uses the generic form of __atomic
+ builtins, which requires the address to be taken of the value
+ parameter, and then we pass that value on. This allows the macros
+ to work for any type, and the compiler is smart enough to convert
+ these to lock-free _N variants if possible, and throw away the
+ temps. */
+
+#define atomic_store_explicit(PTR, VAL, MO) \
+ __extension__ \
+ ({ \
+ __auto_type __atomic_store_ptr = (PTR); \
+ __typeof__ (*__atomic_store_ptr) __atomic_store_tmp = (VAL); \
+ __atomic_store (__atomic_store_ptr, &__atomic_store_tmp, (MO)); \
+ })
+
+#define atomic_store(PTR, VAL) \
+ atomic_store_explicit (PTR, VAL, __ATOMIC_SEQ_CST)
+
+
+#define atomic_load_explicit(PTR, MO) \
+ __extension__ \
+ ({ \
+ __auto_type __atomic_load_ptr = (PTR); \
+ __typeof__ (*__atomic_load_ptr) __atomic_load_tmp; \
+ __atomic_load (__atomic_load_ptr, &__atomic_load_tmp, (MO)); \
+ __atomic_load_tmp; \
+ })
+
+#define atomic_load(PTR) atomic_load_explicit (PTR, __ATOMIC_SEQ_CST)
+
+
+#define atomic_exchange_explicit(PTR, VAL, MO) \
+ __extension__ \
+ ({ \
+ __auto_type __atomic_exchange_ptr = (PTR); \
+ __typeof__ (*__atomic_exchange_ptr) __atomic_exchange_val = (VAL); \
+ __typeof__ (*__atomic_exchange_ptr) __atomic_exchange_tmp; \
+ __atomic_exchange (__atomic_exchange_ptr, &__atomic_exchange_val, \
+ &__atomic_exchange_tmp, (MO)); \
+ __atomic_exchange_tmp; \
+ })
+
+#define atomic_exchange(PTR, VAL) \
+ atomic_exchange_explicit (PTR, VAL, __ATOMIC_SEQ_CST)
+
+
+#define atomic_compare_exchange_strong_explicit(PTR, VAL, DES, SUC, FAIL) \
+ __extension__ \
+ ({ \
+ __auto_type __atomic_compare_exchange_ptr = (PTR); \
+ __typeof__ (*__atomic_compare_exchange_ptr) __atomic_compare_exchange_tmp \
+ = (DES); \
+ __atomic_compare_exchange (__atomic_compare_exchange_ptr, (VAL), \
+ &__atomic_compare_exchange_tmp, 0, \
+ (SUC), (FAIL)); \
+ })
+
+#define atomic_compare_exchange_strong(PTR, VAL, DES) \
+ atomic_compare_exchange_strong_explicit (PTR, VAL, DES, __ATOMIC_SEQ_CST, \
+ __ATOMIC_SEQ_CST)
+
+#define atomic_compare_exchange_weak_explicit(PTR, VAL, DES, SUC, FAIL) \
+ __extension__ \
+ ({ \
+ __auto_type __atomic_compare_exchange_ptr = (PTR); \
+ __typeof__ (*__atomic_compare_exchange_ptr) __atomic_compare_exchange_tmp \
+ = (DES); \
+ __atomic_compare_exchange (__atomic_compare_exchange_ptr, (VAL), \
+ &__atomic_compare_exchange_tmp, 1, \
+ (SUC), (FAIL)); \
+ })
+
+#define atomic_compare_exchange_weak(PTR, VAL, DES) \
+ atomic_compare_exchange_weak_explicit (PTR, VAL, DES, __ATOMIC_SEQ_CST, \
+ __ATOMIC_SEQ_CST)
+
+
+
+#define atomic_fetch_add(PTR, VAL) __atomic_fetch_add ((PTR), (VAL), \
+ __ATOMIC_SEQ_CST)
+#define atomic_fetch_add_explicit(PTR, VAL, MO) \
+ __atomic_fetch_add ((PTR), (VAL), (MO))
+
+#define atomic_fetch_sub(PTR, VAL) __atomic_fetch_sub ((PTR), (VAL), \
+ __ATOMIC_SEQ_CST)
+#define atomic_fetch_sub_explicit(PTR, VAL, MO) \
+ __atomic_fetch_sub ((PTR), (VAL), (MO))
+
+#define atomic_fetch_or(PTR, VAL) __atomic_fetch_or ((PTR), (VAL), \
+ __ATOMIC_SEQ_CST)
+#define atomic_fetch_or_explicit(PTR, VAL, MO) \
+ __atomic_fetch_or ((PTR), (VAL), (MO))
+
+#define atomic_fetch_xor(PTR, VAL) __atomic_fetch_xor ((PTR), (VAL), \
+ __ATOMIC_SEQ_CST)
+#define atomic_fetch_xor_explicit(PTR, VAL, MO) \
+ __atomic_fetch_xor ((PTR), (VAL), (MO))
+
+#define atomic_fetch_and(PTR, VAL) __atomic_fetch_and ((PTR), (VAL), \
+ __ATOMIC_SEQ_CST)
+#define atomic_fetch_and_explicit(PTR, VAL, MO) \
+ __atomic_fetch_and ((PTR), (VAL), (MO))
+
+
+typedef _Atomic struct
+{
+#if __GCC_ATOMIC_TEST_AND_SET_TRUEVAL == 1
+ _Bool __val;
+#else
+ unsigned char __val;
+#endif
+} atomic_flag;
+
+#define ATOMIC_FLAG_INIT { 0 }
+
+
+#define atomic_flag_test_and_set(PTR) \
+ __atomic_test_and_set ((PTR), __ATOMIC_SEQ_CST)
+#define atomic_flag_test_and_set_explicit(PTR, MO) \
+ __atomic_test_and_set ((PTR), (MO))
+
+#define atomic_flag_clear(PTR) __atomic_clear ((PTR), __ATOMIC_SEQ_CST)
+#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear ((PTR), (MO))
+
+#endif /* _STDATOMIC_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdbool.h b/lib/gcc/x86_64-linux-android/4.9/include/stdbool.h
new file mode 100644
index 0000000..f4e802f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdbool.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 1998-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 7.16 Boolean type and values <stdbool.h>
+ */
+
+#ifndef _STDBOOL_H
+#define _STDBOOL_H
+
+#ifndef __cplusplus
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#else /* __cplusplus */
+
+/* Supporting <stdbool.h> in C++ is a GCC extension. */
+#define _Bool bool
+#define bool bool
+#define false false
+#define true true
+
+#endif /* __cplusplus */
+
+/* Signal that all the definitions are present. */
+#define __bool_true_false_are_defined 1
+
+#endif /* stdbool.h */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stddef.h b/lib/gcc/x86_64-linux-android/4.9/include/stddef.h
new file mode 100644
index 0000000..cfa8df3
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stddef.h
@@ -0,0 +1,439 @@
+/* Copyright (C) 1989-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 7.17 Common definitions <stddef.h>
+ */
+#if (!defined(_STDDEF_H) && !defined(_STDDEF_H_) && !defined(_ANSI_STDDEF_H) \
+ && !defined(__STDDEF_H__)) \
+ || defined(__need_wchar_t) || defined(__need_size_t) \
+ || defined(__need_ptrdiff_t) || defined(__need_NULL) \
+ || defined(__need_wint_t)
+
+/* Any one of these symbols __need_* means that GNU libc
+ wants us just to define one data type. So don't define
+ the symbols that indicate this file's entire job has been done. */
+#if (!defined(__need_wchar_t) && !defined(__need_size_t) \
+ && !defined(__need_ptrdiff_t) && !defined(__need_NULL) \
+ && !defined(__need_wint_t))
+#define _STDDEF_H
+#define _STDDEF_H_
+/* snaroff@next.com says the NeXT needs this. */
+#define _ANSI_STDDEF_H
+#endif
+
+#ifndef __sys_stdtypes_h
+/* This avoids lossage on SunOS but only if stdtypes.h comes first.
+ There's no way to win with the other order! Sun lossage. */
+
+/* On 4.3bsd-net2, make sure ansi.h is included, so we have
+ one less case to deal with in the following. */
+#if defined (__BSD_NET2__) || defined (____386BSD____) || (defined (__FreeBSD__) && (__FreeBSD__ < 5)) || defined(__NetBSD__)
+#include <machine/ansi.h>
+#endif
+/* On FreeBSD 5, machine/ansi.h does not exist anymore... */
+#if defined (__FreeBSD__) && (__FreeBSD__ >= 5)
+#include <sys/_types.h>
+#endif
+
+/* In 4.3bsd-net2, machine/ansi.h defines these symbols, which are
+ defined if the corresponding type is *not* defined.
+ FreeBSD-2.1 defines _MACHINE_ANSI_H_ instead of _ANSI_H_.
+ NetBSD defines _I386_ANSI_H_ and _X86_64_ANSI_H_ instead of _ANSI_H_ */
+#if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_)
+#if !defined(_SIZE_T_) && !defined(_BSD_SIZE_T_)
+#define _SIZE_T
+#endif
+#if !defined(_PTRDIFF_T_) && !defined(_BSD_PTRDIFF_T_)
+#define _PTRDIFF_T
+#endif
+/* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_
+ instead of _WCHAR_T_. */
+#if !defined(_WCHAR_T_) && !defined(_BSD_WCHAR_T_)
+#ifndef _BSD_WCHAR_T_
+#define _WCHAR_T
+#endif
+#endif
+/* Undef _FOO_T_ if we are supposed to define foo_t. */
+#if defined (__need_ptrdiff_t) || defined (_STDDEF_H_)
+#undef _PTRDIFF_T_
+#undef _BSD_PTRDIFF_T_
+#endif
+#if defined (__need_size_t) || defined (_STDDEF_H_)
+#undef _SIZE_T_
+#undef _BSD_SIZE_T_
+#endif
+#if defined (__need_wchar_t) || defined (_STDDEF_H_)
+#undef _WCHAR_T_
+#undef _BSD_WCHAR_T_
+#endif
+#endif /* defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_) */
+
+/* Sequent's header files use _PTRDIFF_T_ in some conflicting way.
+ Just ignore it. */
+#if defined (__sequent__) && defined (_PTRDIFF_T_)
+#undef _PTRDIFF_T_
+#endif
+
+/* On VxWorks, <type/vxTypesBase.h> may have defined macros like
+ _TYPE_size_t which will typedef size_t. fixincludes patched the
+ vxTypesBase.h so that this macro is only defined if _GCC_SIZE_T is
+ not defined, and so that defining this macro defines _GCC_SIZE_T.
+ If we find that the macros are still defined at this point, we must
+ invoke them so that the type is defined as expected. */
+#if defined (_TYPE_ptrdiff_t) && (defined (__need_ptrdiff_t) || defined (_STDDEF_H_))
+_TYPE_ptrdiff_t;
+#undef _TYPE_ptrdiff_t
+#endif
+#if defined (_TYPE_size_t) && (defined (__need_size_t) || defined (_STDDEF_H_))
+_TYPE_size_t;
+#undef _TYPE_size_t
+#endif
+#if defined (_TYPE_wchar_t) && (defined (__need_wchar_t) || defined (_STDDEF_H_))
+_TYPE_wchar_t;
+#undef _TYPE_wchar_t
+#endif
+
+/* In case nobody has defined these types, but we aren't running under
+ GCC 2.00, make sure that __PTRDIFF_TYPE__, __SIZE_TYPE__, and
+ __WCHAR_TYPE__ have reasonable values. This can happen if the
+ parts of GCC is compiled by an older compiler, that actually
+ include gstddef.h, such as collect2. */
+
+/* Signed type of difference of two pointers. */
+
+/* Define this type if we are doing the whole job,
+ or if we want this type in particular. */
+#if defined (_STDDEF_H) || defined (__need_ptrdiff_t)
+#ifndef _PTRDIFF_T /* in case <sys/types.h> has defined it. */
+#ifndef _T_PTRDIFF_
+#ifndef _T_PTRDIFF
+#ifndef __PTRDIFF_T
+#ifndef _PTRDIFF_T_
+#ifndef _BSD_PTRDIFF_T_
+#ifndef ___int_ptrdiff_t_h
+#ifndef _GCC_PTRDIFF_T
+#define _PTRDIFF_T
+#define _T_PTRDIFF_
+#define _T_PTRDIFF
+#define __PTRDIFF_T
+#define _PTRDIFF_T_
+#define _BSD_PTRDIFF_T_
+#define ___int_ptrdiff_t_h
+#define _GCC_PTRDIFF_T
+#ifndef __PTRDIFF_TYPE__
+#define __PTRDIFF_TYPE__ long int
+#endif
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+#endif /* _GCC_PTRDIFF_T */
+#endif /* ___int_ptrdiff_t_h */
+#endif /* _BSD_PTRDIFF_T_ */
+#endif /* _PTRDIFF_T_ */
+#endif /* __PTRDIFF_T */
+#endif /* _T_PTRDIFF */
+#endif /* _T_PTRDIFF_ */
+#endif /* _PTRDIFF_T */
+
+/* If this symbol has done its job, get rid of it. */
+#undef __need_ptrdiff_t
+
+#endif /* _STDDEF_H or __need_ptrdiff_t. */
+
+/* Unsigned type of `sizeof' something. */
+
+/* Define this type if we are doing the whole job,
+ or if we want this type in particular. */
+#if defined (_STDDEF_H) || defined (__need_size_t)
+#ifndef __size_t__ /* BeOS */
+#ifndef __SIZE_T__ /* Cray Unicos/Mk */
+#ifndef _SIZE_T /* in case <sys/types.h> has defined it. */
+#ifndef _SYS_SIZE_T_H
+#ifndef _T_SIZE_
+#ifndef _T_SIZE
+#ifndef __SIZE_T
+#ifndef _SIZE_T_
+#ifndef _BSD_SIZE_T_
+#ifndef _SIZE_T_DEFINED_
+#ifndef _SIZE_T_DEFINED
+#ifndef _BSD_SIZE_T_DEFINED_ /* Darwin */
+#ifndef _SIZE_T_DECLARED /* FreeBSD 5 */
+#ifndef ___int_size_t_h
+#ifndef _GCC_SIZE_T
+#ifndef _SIZET_
+#ifndef __size_t
+#define __size_t__ /* BeOS */
+#define __SIZE_T__ /* Cray Unicos/Mk */
+#define _SIZE_T
+#define _SYS_SIZE_T_H
+#define _T_SIZE_
+#define _T_SIZE
+#define __SIZE_T
+#define _SIZE_T_
+#define _BSD_SIZE_T_
+#define _SIZE_T_DEFINED_
+#define _SIZE_T_DEFINED
+#define _BSD_SIZE_T_DEFINED_ /* Darwin */
+#define _SIZE_T_DECLARED /* FreeBSD 5 */
+#define ___int_size_t_h
+#define _GCC_SIZE_T
+#define _SIZET_
+#if (defined (__FreeBSD__) && (__FreeBSD__ >= 5)) \
+ || defined(__FreeBSD_kernel__)
+/* __size_t is a typedef on FreeBSD 5, must not trash it. */
+#elif defined (__VMS__)
+/* __size_t is also a typedef on VMS. */
+#else
+#define __size_t
+#endif
+#ifndef __SIZE_TYPE__
+#define __SIZE_TYPE__ long unsigned int
+#endif
+#if !(defined (__GNUG__) && defined (size_t))
+typedef __SIZE_TYPE__ size_t;
+#ifdef __BEOS__
+typedef long ssize_t;
+#endif /* __BEOS__ */
+#endif /* !(defined (__GNUG__) && defined (size_t)) */
+#endif /* __size_t */
+#endif /* _SIZET_ */
+#endif /* _GCC_SIZE_T */
+#endif /* ___int_size_t_h */
+#endif /* _SIZE_T_DECLARED */
+#endif /* _BSD_SIZE_T_DEFINED_ */
+#endif /* _SIZE_T_DEFINED */
+#endif /* _SIZE_T_DEFINED_ */
+#endif /* _BSD_SIZE_T_ */
+#endif /* _SIZE_T_ */
+#endif /* __SIZE_T */
+#endif /* _T_SIZE */
+#endif /* _T_SIZE_ */
+#endif /* _SYS_SIZE_T_H */
+#endif /* _SIZE_T */
+#endif /* __SIZE_T__ */
+#endif /* __size_t__ */
+#undef __need_size_t
+#endif /* _STDDEF_H or __need_size_t. */
+
+
+/* Wide character type.
+ Locale-writers should change this as necessary to
+ be big enough to hold unique values not between 0 and 127,
+ and not (wchar_t) -1, for each defined multibyte character. */
+
+/* Define this type if we are doing the whole job,
+ or if we want this type in particular. */
+#if defined (_STDDEF_H) || defined (__need_wchar_t)
+#ifndef __wchar_t__ /* BeOS */
+#ifndef __WCHAR_T__ /* Cray Unicos/Mk */
+#ifndef _WCHAR_T
+#ifndef _T_WCHAR_
+#ifndef _T_WCHAR
+#ifndef __WCHAR_T
+#ifndef _WCHAR_T_
+#ifndef _BSD_WCHAR_T_
+#ifndef _BSD_WCHAR_T_DEFINED_ /* Darwin */
+#ifndef _BSD_RUNE_T_DEFINED_ /* Darwin */
+#ifndef _WCHAR_T_DECLARED /* FreeBSD 5 */
+#ifndef _WCHAR_T_DEFINED_
+#ifndef _WCHAR_T_DEFINED
+#ifndef _WCHAR_T_H
+#ifndef ___int_wchar_t_h
+#ifndef __INT_WCHAR_T_H
+#ifndef _GCC_WCHAR_T
+#define __wchar_t__ /* BeOS */
+#define __WCHAR_T__ /* Cray Unicos/Mk */
+#define _WCHAR_T
+#define _T_WCHAR_
+#define _T_WCHAR
+#define __WCHAR_T
+#define _WCHAR_T_
+#define _BSD_WCHAR_T_
+#define _WCHAR_T_DEFINED_
+#define _WCHAR_T_DEFINED
+#define _WCHAR_T_H
+#define ___int_wchar_t_h
+#define __INT_WCHAR_T_H
+#define _GCC_WCHAR_T
+#define _WCHAR_T_DECLARED
+
+/* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_
+ instead of _WCHAR_T_, and _BSD_RUNE_T_ (which, unlike the other
+ symbols in the _FOO_T_ family, stays defined even after its
+ corresponding type is defined). If we define wchar_t, then we
+ must undef _WCHAR_T_; for BSD/386 1.1 (and perhaps others), if
+ we undef _WCHAR_T_, then we must also define rune_t, since
+ headers like runetype.h assume that if machine/ansi.h is included,
+ and _BSD_WCHAR_T_ is not defined, then rune_t is available.
+ machine/ansi.h says, "Note that _WCHAR_T_ and _RUNE_T_ must be of
+ the same type." */
+#ifdef _BSD_WCHAR_T_
+#undef _BSD_WCHAR_T_
+#ifdef _BSD_RUNE_T_
+#if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE)
+typedef _BSD_RUNE_T_ rune_t;
+#define _BSD_WCHAR_T_DEFINED_
+#define _BSD_RUNE_T_DEFINED_ /* Darwin */
+#if defined (__FreeBSD__) && (__FreeBSD__ < 5)
+/* Why is this file so hard to maintain properly? In contrast to
+ the comment above regarding BSD/386 1.1, on FreeBSD for as long
+ as the symbol has existed, _BSD_RUNE_T_ must not stay defined or
+ redundant typedefs will occur when stdlib.h is included after this file. */
+#undef _BSD_RUNE_T_
+#endif
+#endif
+#endif
+#endif
+/* FreeBSD 5 can't be handled well using "traditional" logic above
+ since it no longer defines _BSD_RUNE_T_ yet still desires to export
+ rune_t in some cases... */
+#if defined (__FreeBSD__) && (__FreeBSD__ >= 5)
+#if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE)
+#if __BSD_VISIBLE
+#ifndef _RUNE_T_DECLARED
+typedef __rune_t rune_t;
+#define _RUNE_T_DECLARED
+#endif
+#endif
+#endif
+#endif
+
+#ifndef __WCHAR_TYPE__
+#define __WCHAR_TYPE__ int
+#endif
+#ifndef __cplusplus
+typedef __WCHAR_TYPE__ wchar_t;
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif /* _WCHAR_T_DECLARED */
+#endif /* _BSD_RUNE_T_DEFINED_ */
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif /* __WCHAR_T__ */
+#endif /* __wchar_t__ */
+#undef __need_wchar_t
+#endif /* _STDDEF_H or __need_wchar_t. */
+
+#if defined (__need_wint_t)
+#ifndef _WINT_T
+#define _WINT_T
+
+#ifndef __WINT_TYPE__
+#define __WINT_TYPE__ unsigned int
+#endif
+typedef __WINT_TYPE__ wint_t;
+#endif
+#undef __need_wint_t
+#endif
+
+/* In 4.3bsd-net2, leave these undefined to indicate that size_t, etc.
+ are already defined. */
+/* BSD/OS 3.1 and FreeBSD [23].x require the MACHINE_ANSI_H check here. */
+/* NetBSD 5 requires the I386_ANSI_H and X86_64_ANSI_H checks here. */
+#if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_)
+/* The references to _GCC_PTRDIFF_T_, _GCC_SIZE_T_, and _GCC_WCHAR_T_
+ are probably typos and should be removed before 2.8 is released. */
+#ifdef _GCC_PTRDIFF_T_
+#undef _PTRDIFF_T_
+#undef _BSD_PTRDIFF_T_
+#endif
+#ifdef _GCC_SIZE_T_
+#undef _SIZE_T_
+#undef _BSD_SIZE_T_
+#endif
+#ifdef _GCC_WCHAR_T_
+#undef _WCHAR_T_
+#undef _BSD_WCHAR_T_
+#endif
+/* The following ones are the real ones. */
+#ifdef _GCC_PTRDIFF_T
+#undef _PTRDIFF_T_
+#undef _BSD_PTRDIFF_T_
+#endif
+#ifdef _GCC_SIZE_T
+#undef _SIZE_T_
+#undef _BSD_SIZE_T_
+#endif
+#ifdef _GCC_WCHAR_T
+#undef _WCHAR_T_
+#undef _BSD_WCHAR_T_
+#endif
+#endif /* _ANSI_H_ || _MACHINE_ANSI_H_ || _X86_64_ANSI_H_ || _I386_ANSI_H_ */
+
+#endif /* __sys_stdtypes_h */
+
+/* A null pointer constant. */
+
+#if defined (_STDDEF_H) || defined (__need_NULL)
+#undef NULL /* in case <stdio.h> has defined it. */
+#ifdef __GNUG__
+#define NULL __null
+#else /* G++ */
+#ifndef __cplusplus
+#define NULL ((void *)0)
+#else /* C++ */
+#define NULL 0
+#endif /* C++ */
+#endif /* G++ */
+#endif /* NULL not defined and <stddef.h> or need NULL. */
+#undef __need_NULL
+
+#ifdef _STDDEF_H
+
+/* Offset of member MEMBER in a struct of type TYPE. */
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) \
+ || (defined(__cplusplus) && __cplusplus >= 201103L)
+#ifndef _GCC_MAX_ALIGN_T
+#define _GCC_MAX_ALIGN_T
+/* Type whose alignment is supported in every context and is at least
+ as great as that of any standard type not using alignment
+ specifiers. */
+typedef struct {
+ long long __max_align_ll __attribute__((__aligned__(__alignof__(long long))));
+ long double __max_align_ld __attribute__((__aligned__(__alignof__(long double))));
+} max_align_t;
+#endif
+#endif /* C11 or C++11. */
+
+#if defined(__cplusplus) && __cplusplus >= 201103L
+#ifndef _GXX_NULLPTR_T
+#define _GXX_NULLPTR_T
+ typedef decltype(nullptr) nullptr_t;
+#endif
+#endif /* C++11. */
+
+#endif /* _STDDEF_H was defined this time */
+
+#endif /* !_STDDEF_H && !_STDDEF_H_ && !_ANSI_STDDEF_H && !__STDDEF_H__
+ || __need_XXX was not defined before */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdfix.h b/lib/gcc/x86_64-linux-android/4.9/include/stdfix.h
new file mode 100644
index 0000000..93e759a
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdfix.h
@@ -0,0 +1,204 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* ISO/IEC JTC1 SC22 WG14 N1169
+ * Date: 2006-04-04
+ * ISO/IEC TR 18037
+ * Programming languages - C - Extensions to support embedded processors
+ */
+
+#ifndef _STDFIX_H
+#define _STDFIX_H
+
+/* 7.18a.1 Introduction. */
+
+#undef fract
+#undef accum
+#undef sat
+#define fract _Fract
+#define accum _Accum
+#define sat _Sat
+
+/* 7.18a.3 Precision macros. */
+
+#undef SFRACT_FBIT
+#undef SFRACT_MIN
+#undef SFRACT_MAX
+#undef SFRACT_EPSILON
+#define SFRACT_FBIT __SFRACT_FBIT__
+#define SFRACT_MIN __SFRACT_MIN__
+#define SFRACT_MAX __SFRACT_MAX__
+#define SFRACT_EPSILON __SFRACT_EPSILON__
+
+#undef USFRACT_FBIT
+#undef USFRACT_MIN
+#undef USFRACT_MAX
+#undef USFRACT_EPSILON
+#define USFRACT_FBIT __USFRACT_FBIT__
+#define USFRACT_MIN __USFRACT_MIN__ /* GCC extension. */
+#define USFRACT_MAX __USFRACT_MAX__
+#define USFRACT_EPSILON __USFRACT_EPSILON__
+
+#undef FRACT_FBIT
+#undef FRACT_MIN
+#undef FRACT_MAX
+#undef FRACT_EPSILON
+#define FRACT_FBIT __FRACT_FBIT__
+#define FRACT_MIN __FRACT_MIN__
+#define FRACT_MAX __FRACT_MAX__
+#define FRACT_EPSILON __FRACT_EPSILON__
+
+#undef UFRACT_FBIT
+#undef UFRACT_MIN
+#undef UFRACT_MAX
+#undef UFRACT_EPSILON
+#define UFRACT_FBIT __UFRACT_FBIT__
+#define UFRACT_MIN __UFRACT_MIN__ /* GCC extension. */
+#define UFRACT_MAX __UFRACT_MAX__
+#define UFRACT_EPSILON __UFRACT_EPSILON__
+
+#undef LFRACT_FBIT
+#undef LFRACT_MIN
+#undef LFRACT_MAX
+#undef LFRACT_EPSILON
+#define LFRACT_FBIT __LFRACT_FBIT__
+#define LFRACT_MIN __LFRACT_MIN__
+#define LFRACT_MAX __LFRACT_MAX__
+#define LFRACT_EPSILON __LFRACT_EPSILON__
+
+#undef ULFRACT_FBIT
+#undef ULFRACT_MIN
+#undef ULFRACT_MAX
+#undef ULFRACT_EPSILON
+#define ULFRACT_FBIT __ULFRACT_FBIT__
+#define ULFRACT_MIN __ULFRACT_MIN__ /* GCC extension. */
+#define ULFRACT_MAX __ULFRACT_MAX__
+#define ULFRACT_EPSILON __ULFRACT_EPSILON__
+
+#undef LLFRACT_FBIT
+#undef LLFRACT_MIN
+#undef LLFRACT_MAX
+#undef LLFRACT_EPSILON
+#define LLFRACT_FBIT __LLFRACT_FBIT__ /* GCC extension. */
+#define LLFRACT_MIN __LLFRACT_MIN__ /* GCC extension. */
+#define LLFRACT_MAX __LLFRACT_MAX__ /* GCC extension. */
+#define LLFRACT_EPSILON __LLFRACT_EPSILON__ /* GCC extension. */
+
+#undef ULLFRACT_FBIT
+#undef ULLFRACT_MIN
+#undef ULLFRACT_MAX
+#undef ULLFRACT_EPSILON
+#define ULLFRACT_FBIT __ULLFRACT_FBIT__ /* GCC extension. */
+#define ULLFRACT_MIN __ULLFRACT_MIN__ /* GCC extension. */
+#define ULLFRACT_MAX __ULLFRACT_MAX__ /* GCC extension. */
+#define ULLFRACT_EPSILON __ULLFRACT_EPSILON__ /* GCC extension. */
+
+#undef SACCUM_FBIT
+#undef SACCUM_IBIT
+#undef SACCUM_MIN
+#undef SACCUM_MAX
+#undef SACCUM_EPSILON
+#define SACCUM_FBIT __SACCUM_FBIT__
+#define SACCUM_IBIT __SACCUM_IBIT__
+#define SACCUM_MIN __SACCUM_MIN__
+#define SACCUM_MAX __SACCUM_MAX__
+#define SACCUM_EPSILON __SACCUM_EPSILON__
+
+#undef USACCUM_FBIT
+#undef USACCUM_IBIT
+#undef USACCUM_MIN
+#undef USACCUM_MAX
+#undef USACCUM_EPSILON
+#define USACCUM_FBIT __USACCUM_FBIT__
+#define USACCUM_IBIT __USACCUM_IBIT__
+#define USACCUM_MIN __USACCUM_MIN__ /* GCC extension. */
+#define USACCUM_MAX __USACCUM_MAX__
+#define USACCUM_EPSILON __USACCUM_EPSILON__
+
+#undef ACCUM_FBIT
+#undef ACCUM_IBIT
+#undef ACCUM_MIN
+#undef ACCUM_MAX
+#undef ACCUM_EPSILON
+#define ACCUM_FBIT __ACCUM_FBIT__
+#define ACCUM_IBIT __ACCUM_IBIT__
+#define ACCUM_MIN __ACCUM_MIN__
+#define ACCUM_MAX __ACCUM_MAX__
+#define ACCUM_EPSILON __ACCUM_EPSILON__
+
+#undef UACCUM_FBIT
+#undef UACCUM_IBIT
+#undef UACCUM_MIN
+#undef UACCUM_MAX
+#undef UACCUM_EPSILON
+#define UACCUM_FBIT __UACCUM_FBIT__
+#define UACCUM_IBIT __UACCUM_IBIT__
+#define UACCUM_MIN __UACCUM_MIN__ /* GCC extension. */
+#define UACCUM_MAX __UACCUM_MAX__
+#define UACCUM_EPSILON __UACCUM_EPSILON__
+
+#undef LACCUM_FBIT
+#undef LACCUM_IBIT
+#undef LACCUM_MIN
+#undef LACCUM_MAX
+#undef LACCUM_EPSILON
+#define LACCUM_FBIT __LACCUM_FBIT__
+#define LACCUM_IBIT __LACCUM_IBIT__
+#define LACCUM_MIN __LACCUM_MIN__
+#define LACCUM_MAX __LACCUM_MAX__
+#define LACCUM_EPSILON __LACCUM_EPSILON__
+
+#undef ULACCUM_FBIT
+#undef ULACCUM_IBIT
+#undef ULACCUM_MIN
+#undef ULACCUM_MAX
+#undef ULACCUM_EPSILON
+#define ULACCUM_FBIT __ULACCUM_FBIT__
+#define ULACCUM_IBIT __ULACCUM_IBIT__
+#define ULACCUM_MIN __ULACCUM_MIN__ /* GCC extension. */
+#define ULACCUM_MAX __ULACCUM_MAX__
+#define ULACCUM_EPSILON __ULACCUM_EPSILON__
+
+#undef LLACCUM_FBIT
+#undef LLACCUM_IBIT
+#undef LLACCUM_MIN
+#undef LLACCUM_MAX
+#undef LLACCUM_EPSILON
+#define LLACCUM_FBIT __LLACCUM_FBIT__ /* GCC extension. */
+#define LLACCUM_IBIT __LLACCUM_IBIT__ /* GCC extension. */
+#define LLACCUM_MIN __LLACCUM_MIN__ /* GCC extension. */
+#define LLACCUM_MAX __LLACCUM_MAX__ /* GCC extension. */
+#define LLACCUM_EPSILON __LLACCUM_EPSILON__ /* GCC extension. */
+
+#undef ULLACCUM_FBIT
+#undef ULLACCUM_IBIT
+#undef ULLACCUM_MIN
+#undef ULLACCUM_MAX
+#undef ULLACCUM_EPSILON
+#define ULLACCUM_FBIT __ULLACCUM_FBIT__ /* GCC extension. */
+#define ULLACCUM_IBIT __ULLACCUM_IBIT__ /* GCC extension. */
+#define ULLACCUM_MIN __ULLACCUM_MIN__ /* GCC extension. */
+#define ULLACCUM_MAX __ULLACCUM_MAX__ /* GCC extension. */
+#define ULLACCUM_EPSILON __ULLACCUM_EPSILON__ /* GCC extension. */
+
+#endif /* _STDFIX_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdint-gcc.h b/lib/gcc/x86_64-linux-android/4.9/include/stdint-gcc.h
new file mode 100644
index 0000000..1470cea
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdint-gcc.h
@@ -0,0 +1,263 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * ISO C Standard: 7.18 Integer types <stdint.h>
+ */
+
+#ifndef _GCC_STDINT_H
+#define _GCC_STDINT_H
+
+/* 7.8.1.1 Exact-width integer types */
+
+#ifdef __INT8_TYPE__
+typedef __INT8_TYPE__ int8_t;
+#endif
+#ifdef __INT16_TYPE__
+typedef __INT16_TYPE__ int16_t;
+#endif
+#ifdef __INT32_TYPE__
+typedef __INT32_TYPE__ int32_t;
+#endif
+#ifdef __INT64_TYPE__
+typedef __INT64_TYPE__ int64_t;
+#endif
+#ifdef __UINT8_TYPE__
+typedef __UINT8_TYPE__ uint8_t;
+#endif
+#ifdef __UINT16_TYPE__
+typedef __UINT16_TYPE__ uint16_t;
+#endif
+#ifdef __UINT32_TYPE__
+typedef __UINT32_TYPE__ uint32_t;
+#endif
+#ifdef __UINT64_TYPE__
+typedef __UINT64_TYPE__ uint64_t;
+#endif
+
+/* 7.8.1.2 Minimum-width integer types */
+
+typedef __INT_LEAST8_TYPE__ int_least8_t;
+typedef __INT_LEAST16_TYPE__ int_least16_t;
+typedef __INT_LEAST32_TYPE__ int_least32_t;
+typedef __INT_LEAST64_TYPE__ int_least64_t;
+typedef __UINT_LEAST8_TYPE__ uint_least8_t;
+typedef __UINT_LEAST16_TYPE__ uint_least16_t;
+typedef __UINT_LEAST32_TYPE__ uint_least32_t;
+typedef __UINT_LEAST64_TYPE__ uint_least64_t;
+
+/* 7.8.1.3 Fastest minimum-width integer types */
+
+typedef __INT_FAST8_TYPE__ int_fast8_t;
+typedef __INT_FAST16_TYPE__ int_fast16_t;
+typedef __INT_FAST32_TYPE__ int_fast32_t;
+typedef __INT_FAST64_TYPE__ int_fast64_t;
+typedef __UINT_FAST8_TYPE__ uint_fast8_t;
+typedef __UINT_FAST16_TYPE__ uint_fast16_t;
+typedef __UINT_FAST32_TYPE__ uint_fast32_t;
+typedef __UINT_FAST64_TYPE__ uint_fast64_t;
+
+/* 7.8.1.4 Integer types capable of holding object pointers */
+
+#ifdef __INTPTR_TYPE__
+typedef __INTPTR_TYPE__ intptr_t;
+#endif
+#ifdef __UINTPTR_TYPE__
+typedef __UINTPTR_TYPE__ uintptr_t;
+#endif
+
+/* 7.8.1.5 Greatest-width integer types */
+
+typedef __INTMAX_TYPE__ intmax_t;
+typedef __UINTMAX_TYPE__ uintmax_t;
+
+#if (!defined __cplusplus || __cplusplus >= 201103L \
+ || defined __STDC_LIMIT_MACROS)
+
+/* 7.18.2 Limits of specified-width integer types */
+
+#ifdef __INT8_MAX__
+# undef INT8_MAX
+# define INT8_MAX __INT8_MAX__
+# undef INT8_MIN
+# define INT8_MIN (-INT8_MAX - 1)
+#endif
+#ifdef __UINT8_MAX__
+# undef UINT8_MAX
+# define UINT8_MAX __UINT8_MAX__
+#endif
+#ifdef __INT16_MAX__
+# undef INT16_MAX
+# define INT16_MAX __INT16_MAX__
+# undef INT16_MIN
+# define INT16_MIN (-INT16_MAX - 1)
+#endif
+#ifdef __UINT16_MAX__
+# undef UINT16_MAX
+# define UINT16_MAX __UINT16_MAX__
+#endif
+#ifdef __INT32_MAX__
+# undef INT32_MAX
+# define INT32_MAX __INT32_MAX__
+# undef INT32_MIN
+# define INT32_MIN (-INT32_MAX - 1)
+#endif
+#ifdef __UINT32_MAX__
+# undef UINT32_MAX
+# define UINT32_MAX __UINT32_MAX__
+#endif
+#ifdef __INT64_MAX__
+# undef INT64_MAX
+# define INT64_MAX __INT64_MAX__
+# undef INT64_MIN
+# define INT64_MIN (-INT64_MAX - 1)
+#endif
+#ifdef __UINT64_MAX__
+# undef UINT64_MAX
+# define UINT64_MAX __UINT64_MAX__
+#endif
+
+#undef INT_LEAST8_MAX
+#define INT_LEAST8_MAX __INT_LEAST8_MAX__
+#undef INT_LEAST8_MIN
+#define INT_LEAST8_MIN (-INT_LEAST8_MAX - 1)
+#undef UINT_LEAST8_MAX
+#define UINT_LEAST8_MAX __UINT_LEAST8_MAX__
+#undef INT_LEAST16_MAX
+#define INT_LEAST16_MAX __INT_LEAST16_MAX__
+#undef INT_LEAST16_MIN
+#define INT_LEAST16_MIN (-INT_LEAST16_MAX - 1)
+#undef UINT_LEAST16_MAX
+#define UINT_LEAST16_MAX __UINT_LEAST16_MAX__
+#undef INT_LEAST32_MAX
+#define INT_LEAST32_MAX __INT_LEAST32_MAX__
+#undef INT_LEAST32_MIN
+#define INT_LEAST32_MIN (-INT_LEAST32_MAX - 1)
+#undef UINT_LEAST32_MAX
+#define UINT_LEAST32_MAX __UINT_LEAST32_MAX__
+#undef INT_LEAST64_MAX
+#define INT_LEAST64_MAX __INT_LEAST64_MAX__
+#undef INT_LEAST64_MIN
+#define INT_LEAST64_MIN (-INT_LEAST64_MAX - 1)
+#undef UINT_LEAST64_MAX
+#define UINT_LEAST64_MAX __UINT_LEAST64_MAX__
+
+#undef INT_FAST8_MAX
+#define INT_FAST8_MAX __INT_FAST8_MAX__
+#undef INT_FAST8_MIN
+#define INT_FAST8_MIN (-INT_FAST8_MAX - 1)
+#undef UINT_FAST8_MAX
+#define UINT_FAST8_MAX __UINT_FAST8_MAX__
+#undef INT_FAST16_MAX
+#define INT_FAST16_MAX __INT_FAST16_MAX__
+#undef INT_FAST16_MIN
+#define INT_FAST16_MIN (-INT_FAST16_MAX - 1)
+#undef UINT_FAST16_MAX
+#define UINT_FAST16_MAX __UINT_FAST16_MAX__
+#undef INT_FAST32_MAX
+#define INT_FAST32_MAX __INT_FAST32_MAX__
+#undef INT_FAST32_MIN
+#define INT_FAST32_MIN (-INT_FAST32_MAX - 1)
+#undef UINT_FAST32_MAX
+#define UINT_FAST32_MAX __UINT_FAST32_MAX__
+#undef INT_FAST64_MAX
+#define INT_FAST64_MAX __INT_FAST64_MAX__
+#undef INT_FAST64_MIN
+#define INT_FAST64_MIN (-INT_FAST64_MAX - 1)
+#undef UINT_FAST64_MAX
+#define UINT_FAST64_MAX __UINT_FAST64_MAX__
+
+#ifdef __INTPTR_MAX__
+# undef INTPTR_MAX
+# define INTPTR_MAX __INTPTR_MAX__
+# undef INTPTR_MIN
+# define INTPTR_MIN (-INTPTR_MAX - 1)
+#endif
+#ifdef __UINTPTR_MAX__
+# undef UINTPTR_MAX
+# define UINTPTR_MAX __UINTPTR_MAX__
+#endif
+
+#undef INTMAX_MAX
+#define INTMAX_MAX __INTMAX_MAX__
+#undef INTMAX_MIN
+#define INTMAX_MIN (-INTMAX_MAX - 1)
+#undef UINTMAX_MAX
+#define UINTMAX_MAX __UINTMAX_MAX__
+
+/* 7.18.3 Limits of other integer types */
+
+#undef PTRDIFF_MAX
+#define PTRDIFF_MAX __PTRDIFF_MAX__
+#undef PTRDIFF_MIN
+#define PTRDIFF_MIN (-PTRDIFF_MAX - 1)
+
+#undef SIG_ATOMIC_MAX
+#define SIG_ATOMIC_MAX __SIG_ATOMIC_MAX__
+#undef SIG_ATOMIC_MIN
+#define SIG_ATOMIC_MIN __SIG_ATOMIC_MIN__
+
+#undef SIZE_MAX
+#define SIZE_MAX __SIZE_MAX__
+
+#undef WCHAR_MAX
+#define WCHAR_MAX __WCHAR_MAX__
+#undef WCHAR_MIN
+#define WCHAR_MIN __WCHAR_MIN__
+
+#undef WINT_MAX
+#define WINT_MAX __WINT_MAX__
+#undef WINT_MIN
+#define WINT_MIN __WINT_MIN__
+
+#endif /* (!defined __cplusplus || __cplusplus >= 201103L
+ || defined __STDC_LIMIT_MACROS) */
+
+#if (!defined __cplusplus || __cplusplus >= 201103L \
+ || defined __STDC_CONSTANT_MACROS)
+
+#undef INT8_C
+#define INT8_C(c) __INT8_C(c)
+#undef INT16_C
+#define INT16_C(c) __INT16_C(c)
+#undef INT32_C
+#define INT32_C(c) __INT32_C(c)
+#undef INT64_C
+#define INT64_C(c) __INT64_C(c)
+#undef UINT8_C
+#define UINT8_C(c) __UINT8_C(c)
+#undef UINT16_C
+#define UINT16_C(c) __UINT16_C(c)
+#undef UINT32_C
+#define UINT32_C(c) __UINT32_C(c)
+#undef UINT64_C
+#define UINT64_C(c) __UINT64_C(c)
+#undef INTMAX_C
+#define INTMAX_C(c) __INTMAX_C(c)
+#undef UINTMAX_C
+#define UINTMAX_C(c) __UINTMAX_C(c)
+
+#endif /* (!defined __cplusplus || __cplusplus >= 201103L
+ || defined __STDC_CONSTANT_MACROS) */
+
+#endif /* _GCC_STDINT_H */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdint.h b/lib/gcc/x86_64-linux-android/4.9/include/stdint.h
new file mode 100644
index 0000000..83b6f70
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdint.h
@@ -0,0 +1,14 @@
+#ifndef _GCC_WRAP_STDINT_H
+#if __STDC_HOSTED__
+# if defined __cplusplus && __cplusplus >= 201103L
+# undef __STDC_LIMIT_MACROS
+# define __STDC_LIMIT_MACROS
+# undef __STDC_CONSTANT_MACROS
+# define __STDC_CONSTANT_MACROS
+# endif
+# include_next <stdint.h>
+#else
+# include "stdint-gcc.h"
+#endif
+#define _GCC_WRAP_STDINT_H
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/stdnoreturn.h b/lib/gcc/x86_64-linux-android/4.9/include/stdnoreturn.h
new file mode 100644
index 0000000..0134137
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/stdnoreturn.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* ISO C1X: 7.23 _Noreturn <stdnoreturn.h>. */
+
+#ifndef _STDNORETURN_H
+#define _STDNORETURN_H
+
+#ifndef __cplusplus
+
+#define noreturn _Noreturn
+
+#endif
+
+#endif /* stdnoreturn.h */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/tbmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/tbmintrin.h
new file mode 100644
index 0000000..871f532
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/tbmintrin.h
@@ -0,0 +1,180 @@
+/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _TBMINTRIN_H_INCLUDED
+#define _TBMINTRIN_H_INCLUDED
+
+#ifndef __TBM__
+#pragma GCC push_options
+#pragma GCC target("tbm")
+#define __DISABLE_TBM__
+#endif /* __TBM__ */
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u32 (unsigned int __X, const unsigned int __I)
+{
+ return __builtin_ia32_bextri_u32 (__X, __I);
+}
+#else
+#define __bextri_u32(X, I) \
+ ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \
+ (unsigned int)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u32 (unsigned int __X)
+{
+ return __X & (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u32 (unsigned int __X)
+{
+ return __X | ~(__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u32 (unsigned int __X)
+{
+ return ~__X & (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u32 (unsigned int __X)
+{
+ return __X ^ (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u32 (unsigned int __X)
+{
+ return __X | (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u32 (unsigned int __X)
+{
+ return __X | (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u32 (unsigned int __X)
+{
+ return ~__X | (__X - 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u32 (unsigned int __X)
+{
+ return ~__X | (__X + 1);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u32 (unsigned int __X)
+{
+ return ~__X & (__X - 1);
+}
+
+
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u64 (unsigned long long __X, const unsigned int __I)
+{
+ return __builtin_ia32_bextri_u64 (__X, __I);
+}
+#else
+#define __bextri_u64(X, I) \
+ ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \
+ (unsigned long long)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u64 (unsigned long long __X)
+{
+ return __X & (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u64 (unsigned long long __X)
+{
+ return __X | ~(__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u64 (unsigned long long __X)
+{
+ return ~__X & (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u64 (unsigned long long __X)
+{
+ return __X ^ (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u64 (unsigned long long __X)
+{
+ return __X | (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u64 (unsigned long long __X)
+{
+ return __X | (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u64 (unsigned long long __X)
+{
+ return ~__X | (__X - 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u64 (unsigned long long __X)
+{
+ return ~__X | (__X + 1);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u64 (unsigned long long __X)
+{
+ return ~__X & (__X - 1);
+}
+
+
+#endif /* __x86_64__ */
+
+#ifdef __DISABLE_TBM__
+#undef __DISABLE_TBM__
+#pragma GCC pop_options
+#endif /* __DISABLE_TBM__ */
+
+#endif /* _TBMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/tmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/tmmintrin.h
new file mode 100644
index 0000000..89556d2
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/tmmintrin.h
@@ -0,0 +1,249 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.1. */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __DISABLE_SSSE3__
+#endif /* __SSSE3__ */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+ (__v2di)__Y, __N * 8);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
+{
+ return (__m64) __builtin_ia32_palignr ((__v1di)__X,
+ (__v1di)__Y, __N * 8);
+}
+#else
+#define _mm_alignr_epi8(X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N) \
+ ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X), \
+ (__v1di)(__m64)(Y), \
+ (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi8 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi16 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi32 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/unwind.h b/lib/gcc/x86_64-linux-android/4.9/include/unwind.h
new file mode 100644
index 0000000..d351fb9
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/unwind.h
@@ -0,0 +1,293 @@
+/* Exception handling and frame unwind runtime interface routines.
+ Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is derived from the C++ ABI for IA-64. Where we diverge
+ for cross-architecture compatibility are noted with "@@@". */
+
+#ifndef _UNWIND_H
+#define _UNWIND_H
+
+#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
+/* Only for _GCC_specific_handler. */
+#include <windows.h>
+#endif
+
+#ifndef HIDE_EXPORTS
+#pragma GCC visibility push(default)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Level 1: Base ABI */
+
+/* @@@ The IA-64 ABI uses uint64 throughout. Most places this is
+ inefficient for 32-bit and smaller machines. */
+typedef unsigned _Unwind_Word __attribute__((__mode__(__unwind_word__)));
+typedef signed _Unwind_Sword __attribute__((__mode__(__unwind_word__)));
+#if defined(__ia64__) && defined(__hpux__)
+typedef unsigned _Unwind_Ptr __attribute__((__mode__(__word__)));
+#else
+typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__)));
+#endif
+typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__)));
+
+/* @@@ The IA-64 ABI uses a 64-bit word to identify the producer and
+ consumer of an exception. We'll go along with this for now even on
+ 32-bit machines. We'll need to provide some other option for
+ 16-bit machines and for machines with > 8 bits per byte. */
+typedef unsigned _Unwind_Exception_Class __attribute__((__mode__(__DI__)));
+
+/* The unwind interface uses reason codes in several contexts to
+ identify the reasons for failures or other actions. */
+typedef enum
+{
+ _URC_NO_REASON = 0,
+ _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+ _URC_FATAL_PHASE2_ERROR = 2,
+ _URC_FATAL_PHASE1_ERROR = 3,
+ _URC_NORMAL_STOP = 4,
+ _URC_END_OF_STACK = 5,
+ _URC_HANDLER_FOUND = 6,
+ _URC_INSTALL_CONTEXT = 7,
+ _URC_CONTINUE_UNWIND = 8
+} _Unwind_Reason_Code;
+
+
+/* The unwind interface uses a pointer to an exception header object
+ as its representation of an exception being thrown. In general, the
+ full representation of an exception object is language- and
+ implementation-specific, but it will be prefixed by a header
+ understood by the unwind interface. */
+
+struct _Unwind_Exception;
+
+typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
+ struct _Unwind_Exception *);
+
+struct _Unwind_Exception
+{
+ _Unwind_Exception_Class exception_class;
+ _Unwind_Exception_Cleanup_Fn exception_cleanup;
+
+#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
+ _Unwind_Word private_[6];
+#else
+ _Unwind_Word private_1;
+ _Unwind_Word private_2;
+#endif
+
+ /* @@@ The IA-64 ABI says that this structure must be double-word aligned.
+ Taking that literally does not make much sense generically. Instead we
+ provide the maximum alignment required by any type for the machine. */
+} __attribute__((__aligned__));
+
+
+/* The ACTIONS argument to the personality routine is a bitwise OR of one
+ or more of the following constants. */
+typedef int _Unwind_Action;
+
+#define _UA_SEARCH_PHASE 1
+#define _UA_CLEANUP_PHASE 2
+#define _UA_HANDLER_FRAME 4
+#define _UA_FORCE_UNWIND 8
+#define _UA_END_OF_STACK 16
+
+/* The target can override this macro to define any back-end-specific
+ attributes required for the lowest-level stack frame. */
+#ifndef LIBGCC2_UNWIND_ATTRIBUTE
+#define LIBGCC2_UNWIND_ATTRIBUTE
+#endif
+
+/* This is an opaque type used to refer to a system-specific data
+ structure used by the system unwinder. This context is created and
+ destroyed by the system, and passed to the personality routine
+ during unwinding. */
+struct _Unwind_Context;
+
+/* Raise an exception, passing along the given exception object. */
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_RaiseException (struct _Unwind_Exception *);
+
+/* Raise an exception for forced unwinding. */
+
+typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
+ (int, _Unwind_Action, _Unwind_Exception_Class,
+ struct _Unwind_Exception *, struct _Unwind_Context *, void *);
+
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *);
+
+/* Helper to invoke the exception_cleanup routine. */
+extern void _Unwind_DeleteException (struct _Unwind_Exception *);
+
+/* Resume propagation of an existing exception. This is used after
+ e.g. executing cleanup code, and not to implement rethrowing. */
+extern void LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_Resume (struct _Unwind_Exception *);
+
+/* @@@ Resume propagation of a FORCE_UNWIND exception, or to rethrow
+ a normal exception that was handled. */
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_Resume_or_Rethrow (struct _Unwind_Exception *);
+
+/* @@@ Use unwind data to perform a stack backtrace. The trace callback
+ is called for every stack frame in the call chain, but no cleanup
+ actions are performed. */
+typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)
+ (struct _Unwind_Context *, void *);
+
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_Backtrace (_Unwind_Trace_Fn, void *);
+
+/* These functions are used for communicating information about the unwind
+ context (i.e. the unwind descriptors and the user register state) between
+ the unwind library and the personality routine and landing pad. Only
+ selected registers may be manipulated. */
+
+extern _Unwind_Word _Unwind_GetGR (struct _Unwind_Context *, int);
+extern void _Unwind_SetGR (struct _Unwind_Context *, int, _Unwind_Word);
+
+extern _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *);
+extern _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *);
+extern void _Unwind_SetIP (struct _Unwind_Context *, _Unwind_Ptr);
+
+/* @@@ Retrieve the CFA of the given context. */
+extern _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *);
+
+extern void *_Unwind_GetLanguageSpecificData (struct _Unwind_Context *);
+
+extern _Unwind_Ptr _Unwind_GetRegionStart (struct _Unwind_Context *);
+
+
+/* The personality routine is the function in the C++ (or other language)
+ runtime library which serves as an interface between the system unwind
+ library and language-specific exception handling semantics. It is
+ specific to the code fragment described by an unwind info block, and
+ it is always referenced via the pointer in the unwind info block, and
+ hence it has no ABI-specified name.
+
+ Note that this implies that two different C++ implementations can
+ use different names, and have different contents in the language
+ specific data area. Moreover, that the language specific data
+ area contains no version info because name of the function invoked
+ provides more effective versioning by detecting at link time the
+ lack of code to handle the different data format. */
+
+typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)
+ (int, _Unwind_Action, _Unwind_Exception_Class,
+ struct _Unwind_Exception *, struct _Unwind_Context *);
+
+/* @@@ The following alternate entry points are for setjmp/longjmp
+ based unwinding. */
+
+struct SjLj_Function_Context;
+extern void _Unwind_SjLj_Register (struct SjLj_Function_Context *);
+extern void _Unwind_SjLj_Unregister (struct SjLj_Function_Context *);
+
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_SjLj_RaiseException (struct _Unwind_Exception *);
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_SjLj_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *);
+extern void LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_SjLj_Resume (struct _Unwind_Exception *);
+extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
+_Unwind_SjLj_Resume_or_Rethrow (struct _Unwind_Exception *);
+
+/* @@@ The following provide access to the base addresses for text
+ and data-relative addressing in the LDSA. In order to stay link
+ compatible with the standard ABI for IA-64, we inline these. */
+
+#ifdef __ia64__
+#include <stdlib.h>
+
+static inline _Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *_C)
+{
+ /* The GP is stored in R1. */
+ return _Unwind_GetGR (_C, 1);
+}
+
+static inline _Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *_C __attribute__ ((__unused__)))
+{
+ abort ();
+ return 0;
+}
+
+/* @@@ Retrieve the Backing Store Pointer of the given context. */
+extern _Unwind_Word _Unwind_GetBSP (struct _Unwind_Context *);
+#else
+extern _Unwind_Ptr _Unwind_GetDataRelBase (struct _Unwind_Context *);
+extern _Unwind_Ptr _Unwind_GetTextRelBase (struct _Unwind_Context *);
+#endif
+
+/* @@@ Given an address, return the entry point of the function that
+ contains it. */
+extern void * _Unwind_FindEnclosingFunction (void *pc);
+
+#ifndef __SIZEOF_LONG__
+ #error "__SIZEOF_LONG__ macro not defined"
+#endif
+
+#ifndef __SIZEOF_POINTER__
+ #error "__SIZEOF_POINTER__ macro not defined"
+#endif
+
+
+/* leb128 type numbers have a potentially unlimited size.
+ The target of the following definitions of _sleb128_t and _uleb128_t
+ is to have efficient data types large enough to hold the leb128 type
+ numbers used in the unwind code.
+ Mostly these types will simply be defined to long and unsigned long
+ except when a unsigned long data type on the target machine is not
+ capable of storing a pointer. */
+
+#if __SIZEOF_LONG__ >= __SIZEOF_POINTER__
+ typedef long _sleb128_t;
+ typedef unsigned long _uleb128_t;
+#elif __SIZEOF_LONG_LONG__ >= __SIZEOF_POINTER__
+ typedef long long _sleb128_t;
+ typedef unsigned long long _uleb128_t;
+#else
+# error "What type shall we use for _sleb128_t?"
+#endif
+
+#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
+/* Handles the mapping from SEH to GCC interfaces. */
+EXCEPTION_DISPOSITION _GCC_specific_handler (PEXCEPTION_RECORD, void *,
+ PCONTEXT, PDISPATCHER_CONTEXT,
+ _Unwind_Personality_Fn);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef HIDE_EXPORTS
+#pragma GCC visibility pop
+#endif
+
+#endif /* unwind.h */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/varargs.h b/lib/gcc/x86_64-linux-android/4.9/include/varargs.h
new file mode 100644
index 0000000..4b9803e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/varargs.h
@@ -0,0 +1,7 @@
+#ifndef _VARARGS_H
+#define _VARARGS_H
+
+#error "GCC no longer implements <varargs.h>."
+#error "Revise your code to use <stdarg.h>."
+
+#endif
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/wmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/wmmintrin.h
new file mode 100644
index 0000000..2002375
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/wmmintrin.h
@@ -0,0 +1,132 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.1. */
+
+#ifndef _WMMINTRIN_H_INCLUDED
+#define _WMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 header file. */
+#include <emmintrin.h>
+
+/* AES */
+
+#ifndef __AES__
+#pragma GCC push_options
+#pragma GCC target("aes")
+#define __DISABLE_AES__
+#endif /* __AES__ */
+
+/* Performs 1 round of AES decryption of the first m128i using
+ the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdec_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES decryption of the first m128i
+ using the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
+ (__v2di)__Y);
+}
+
+/* Performs 1 round of AES encryption of the first m128i using
+ the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenc_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES encryption of the first m128i
+ using the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the InverseMixColumn operation on the source m128i
+ and stores the result into m128i destination. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesimc_si128 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
+}
+
+/* Generates a m128i round key for the input m128i AES cipher key and
+ byte round constant. The second parameter must be a compile time
+ constant. */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
+{
+ return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
+}
+#else
+#define _mm_aeskeygenassist_si128(X, C) \
+ ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
+ (int)(C)))
+#endif
+
+#ifdef __DISABLE_AES__
+#undef __DISABLE_AES__
+#pragma GCC pop_options
+#endif /* __DISABLE_AES__ */
+
+/* PCLMUL */
+
+#ifndef __PCLMUL__
+#pragma GCC push_options
+#pragma GCC target("pclmul")
+#define __DISABLE_PCLMUL__
+#endif /* __PCLMUL__ */
+
+/* Performs carry-less integer multiplication of 64-bit halves of
+ 128-bit input operands. The third parameter inducates which 64-bit
+ haves of the input parameters v1 and v2 should be used. It must be
+ a compile time constant. */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
+{
+ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
+ (__v2di)__Y, __I);
+}
+#else
+#define _mm_clmulepi64_si128(X, Y, I) \
+ ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(I)))
+#endif
+
+#ifdef __DISABLE_PCLMUL__
+#undef __DISABLE_PCLMUL__
+#pragma GCC pop_options
+#endif /* __DISABLE_PCLMUL__ */
+
+#endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/x86intrin.h b/lib/gcc/x86_64-linux-android/4.9/include/x86intrin.h
new file mode 100644
index 0000000..80e9e6f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/x86intrin.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+#define _X86INTRIN_H_INCLUDED
+
+#include <ia32intrin.h>
+
+#include <mmintrin.h>
+
+#include <xmmintrin.h>
+
+#include <emmintrin.h>
+
+#include <pmmintrin.h>
+
+#include <tmmintrin.h>
+
+#include <ammintrin.h>
+
+#include <smmintrin.h>
+
+#include <wmmintrin.h>
+
+/* For including AVX instructions */
+#include <immintrin.h>
+
+#include <mm3dnow.h>
+
+#include <fma4intrin.h>
+
+#include <xopintrin.h>
+
+#include <lwpintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <tbmintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <popcntintrin.h>
+
+#include <rdseedintrin.h>
+
+#include <prfchwintrin.h>
+
+#include <fxsrintrin.h>
+
+#include <xsaveintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <adxintrin.h>
+
+#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/xmmintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/xmmintrin.h
new file mode 100644
index 0000000..a3824e7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/xmmintrin.h
@@ -0,0 +1,1265 @@
+/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+/* We need type definitions from the MMX header file. */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free (). */
+#include <mm_malloc.h>
+
+/* Constants for use with _mm_prefetch. */
+enum _mm_hint
+{
+ /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
+ _MM_HINT_ET0 = 7,
+ _MM_HINT_ET1 = 6,
+ _MM_HINT_T0 = 3,
+ _MM_HINT_T1 = 2,
+ _MM_HINT_T2 = 1,
+ _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+ processor. The selector I specifies the type of prefetch operation. */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+ __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+}
+#else
+#define _mm_prefetch(P, I) \
+ __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+#endif
+
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __DISABLE_SSE__
+#endif /* __SSE__ */
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create a selector for use with the SHUFPS instruction. */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
+/* Bits in the MXCSR. */
+#define _MM_EXCEPT_MASK 0x003f
+#define _MM_EXCEPT_INVALID 0x0001
+#define _MM_EXCEPT_DENORM 0x0002
+#define _MM_EXCEPT_DIV_ZERO 0x0004
+#define _MM_EXCEPT_OVERFLOW 0x0008
+#define _MM_EXCEPT_UNDERFLOW 0x0010
+#define _MM_EXCEPT_INEXACT 0x0020
+
+#define _MM_MASK_MASK 0x1f80
+#define _MM_MASK_INVALID 0x0080
+#define _MM_MASK_DENORM 0x0100
+#define _MM_MASK_DIV_ZERO 0x0200
+#define _MM_MASK_OVERFLOW 0x0400
+#define _MM_MASK_UNDERFLOW 0x0800
+#define _MM_MASK_INEXACT 0x1000
+
+#define _MM_ROUND_MASK 0x6000
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+
+/* Create an undefined vector. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+ __m128 __Y = __Y;
+ return __Y;
+}
+
+/* Create a vector of zeros. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+ return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+ floating-point) values of A and B; the upper three SPFP values are
+ passed through from A. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform the respective operation on the four SPFP values in A and B. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform logical bit-wise operations on 128-bit values. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andnps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_orps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_xorps (__A, __B);
+}
+
+/* Perform a comparison on the lower SPFP values of A and B. If the
+ comparison is true, place a mask of all ones in the result, otherwise a
+ mask of zeros. The upper three SPFP values are passed through from A. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform a comparison on the four SPFP values of A and B. For each
+ element, if the comparison is true, place a mask of all ones in the
+ result, otherwise a mask of zeros. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+ and 0 if false. */
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+ return _mm_cvtss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the
+ current rounding mode. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+ current rounding mode. Return the integers in packed form. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+ return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+ return _mm_cvttss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Truncate the two lower SPFP values to 32-bit integers. Return the
+ integers in packed form. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+ return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+ return _mm_cvtsi32_ss (__A, __B);
+}
+
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+
+/* Intel intrinsic. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+ as the two lower elements in A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+ return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+ return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+ __v4hi __sign;
+ __v2si __hisi, __losi;
+ __v4sf __zero, __ra, __rb;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+
+ /* Convert the four words to doublewords. */
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __zero = (__v4sf) _mm_setzero_ps ();
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+ return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+ __v2si __hisi, __losi;
+ __v4sf __zero, __ra, __rb;
+
+ /* Convert the four words to doublewords. */
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __zero = (__v4sf) _mm_setzero_ps ();
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+ return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+ __v8qi __sign;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+
+ /* Convert the four low bytes to words. */
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
+
+ return _mm_cvtpi16_ps(__A);
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu8_ps(__m64 __A)
+{
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
+ return _mm_cvtpu16_ps(__A);
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+ __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+ __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+ __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
+ return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16(__m128 __A)
+{
+ __v4sf __hisf = (__v4sf)__A;
+ __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+ __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+ __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+ return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8(__m128 __A)
+{
+ __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+ return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
+}
+
+/* Selects four specific SPFP values from A and B based on MASK. */
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
+{
+ return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
+}
+#else
+#define _mm_shuffle_ps(A, B, MASK) \
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(MASK)))
+#endif
+
+/* Selects and interleaves the upper two SPFP values from A and B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+ the lower two values are passed through from A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the upper two SPFP values of A into P. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Moves the upper two values of B into the lower two values of A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Moves the lower two values of B into the upper two values of A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+ the upper two values are passed through from A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the lower two SPFP values of A into P. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128 __A)
+{
+ return __builtin_ia32_movmskps ((__v4sf)__A);
+}
+
+/* Return the contents of the control register. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getcsr (void)
+{
+ return __builtin_ia32_stmxcsr ();
+}
+
+/* Read exception bits from the control register. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_STATE (void)
+{
+ return _mm_getcsr() & _MM_EXCEPT_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_MASK (void)
+{
+ return _mm_getcsr() & _MM_MASK_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_ROUNDING_MODE (void)
+{
+ return _mm_getcsr() & _MM_ROUND_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
+ return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
+}
+
+/* Set the control register to I. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setcsr (unsigned int __I)
+{
+ __builtin_ia32_ldmxcsr (__I);
+}
+
+/* Set exception bits in the control register. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
+}
+
+/* Create a vector with element 0 as F and the rest zero. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Create a vector with all four elements equal to F. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+ return _mm_set1_ps (__F);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+ return _mm_set_ss (*__P);
+}
+
+/* Create a vector with all four elements equal to *P. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+ return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+ return _mm_load1_ps (__P);
+}
+
+/* Load four SPFP values from P. The address must be 16-byte aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+ return (__m128) *(__v4sf *)__P;
+}
+
+/* Load four SPFP values from P. The address need not be 16-byte aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+ return (__m128) __builtin_ia32_loadups (__P);
+}
+
+/* Load four SPFP values in reverse order. The address must be aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+ __v4sf __tmp = *(__v4sf *)__P;
+ return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
+}
+
+/* Create the vector [Z Y X W]. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z]. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Stores the lower SPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+ *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+ return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* Store four SPFP values. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+ *(__v4sf *)__P = (__v4sf)__A;
+}
+
+/* Store four SPFP values. The address need not be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_storeups (__P, (__v4sf)__A);
+}
+
+/* Store the lower SPFP value across four words. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+ _mm_storeu_ps (__P, __tmp);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+ _mm_store1_ps (__P, __A);
+}
+
+/* Store four SPFP values in reverse order. The address must be aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+ _mm_store_ps (__P, __tmp);
+}
+
+/* Sets the low SPFP value of A from the low value of B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Extracts one of the four words of A. The selector N must be immediate. */
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+ return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+ return _mm_extract_pi16 (__A, __N);
+}
+#else
+#define _mm_extract_pi16(A, N) \
+ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
+#endif
+
+/* Inserts word D into one of four words of A. The selector N must be
+ immediate. */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+ return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+ return _mm_insert_pi16 (__A, __D, __N);
+}
+#else
+#define _mm_insert_pi16(A, D, N) \
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \
+ (int)(D), (int)(N)))
+
+#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
+#endif
+
+/* Compute the element-wise maximum of signed 16-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+ return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+ return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+ return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+ return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+ return __builtin_ia32_pmovmskb ((__v8qi)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+ return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+ in B and produce the high 16 bits of the 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+ return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A. The selector
+ must be an immediate. */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+ return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+ return _mm_shuffle_pi16 (__A, __N);
+}
+#else
+#define _mm_shuffle_pi16(A, N) \
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
+#endif
+
+/* Conditionally store byte elements of A into P. The high bit of each
+ byte in the selector N determines whether the corresponding byte from
+ A is stored. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+ __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+ _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+ values in A and B. Return the value in the lower 16-bit word; the
+ upper words are cleared. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+ return _mm_sad_pu8 (__A, __B);
+}
+
+/* Stores the data in A to the address P without polluting the caches. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+ __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
+}
+
+/* Likewise. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_movntps (__P, (__v4sf)__A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+ any subsequent store. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+ __builtin_ia32_sfence ();
+}
+
+/* The execution of the next instruction is delayed by an implementation
+ specific amount of time. The instruction does not modify the
+ architectural state. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+ __builtin_ia32_pause ();
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3]. */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
+do { \
+ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
+ __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \
+ __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \
+ __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \
+ __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \
+ (row0) = __builtin_ia32_movlhps (__t0, __t1); \
+ (row1) = __builtin_ia32_movhlps (__t1, __t0); \
+ (row2) = __builtin_ia32_movlhps (__t2, __t3); \
+ (row3) = __builtin_ia32_movhlps (__t3, __t2); \
+} while (0)
+
+/* For backward source compatibility. */
+# include <emmintrin.h>
+
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE__ */
+
+#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/xopintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/xopintrin.h
new file mode 100644
index 0000000..cc82bc5
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/xopintrin.h
@@ -0,0 +1,844 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#include <fma4intrin.h>
+
+#ifndef __XOP__
+#pragma GCC push_options
+#pragma GCC target("xop")
+#define __DISABLE_XOP__
+#endif /* __XOP__ */
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+ Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+ ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+ ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+ ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+ ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+ pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf)__A,
+ (__v4sf)
+ __builtin_ia32_vfrczss ((__v4sf)__B));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
+ (__v2df)
+ __builtin_ia32_vfrczsd ((__v2df)__B));
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+/* PERMIL2 */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
+{
+ return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
+ (__v2df)__Y,
+ (__v2di)__C,
+ __I);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
+{
+ return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
+ (__v4df)__Y,
+ (__v4di)__C,
+ __I);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
+{
+ return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
+ (__v4sf)__Y,
+ (__v4si)__C,
+ __I);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
+{
+ return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ (__v8si)__C,
+ __I);
+}
+#else
+#define _mm_permute2_pd(X, Y, C, I) \
+ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128d)(C), \
+ (int)(I)))
+
+#define _mm256_permute2_pd(X, Y, C, I) \
+ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256d)(C), \
+ (int)(I)))
+
+#define _mm_permute2_ps(X, Y, C, I) \
+ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128)(C), \
+ (int)(I)))
+
+#define _mm256_permute2_ps(X, Y, C, I) \
+ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256)(C), \
+ (int)(I)))
+#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_XOP__
+#undef __DISABLE_XOP__
+#pragma GCC pop_options
+#endif /* __DISABLE_XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/xsaveintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/xsaveintrin.h
new file mode 100644
index 0000000..47be25f
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/xsaveintrin.h
@@ -0,0 +1,72 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <xsaveintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _XSAVEINTRIN_H_INCLUDED
+#define _XSAVEINTRIN_H_INCLUDED
+
+#ifndef __XSAVE__
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#define __DISABLE_XSAVE__
+#endif /* __XSAVE__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsave (void *__P, long long __M)
+{
+ return __builtin_ia32_xsave (__P, __M);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xrstor (void *__P, long long __M)
+{
+ return __builtin_ia32_xrstor (__P, __M);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsave64 (void *__P, long long __M)
+{
+ return __builtin_ia32_xsave64 (__P, __M);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xrstor64 (void *__P, long long __M)
+{
+ return __builtin_ia32_xrstor64 (__P, __M);
+}
+#endif
+
+#ifdef __DISABLE_XSAVE__
+#undef __DISABLE_XSAVE__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVE__ */
+
+#endif /* _XSAVEINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/xsaveoptintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/xsaveoptintrin.h
new file mode 100644
index 0000000..d7534b4
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/xsaveoptintrin.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
+/* # error "Never use <xsaveoptintrin.h> directly; include <x86intrin.h> instead." */
+/* #endif */
+
+#ifndef _XSAVEOPTINTRIN_H_INCLUDED
+#define _XSAVEOPTINTRIN_H_INCLUDED
+
+#ifndef __XSAVEOPT__
+#pragma GCC push_options
+#pragma GCC target("xsaveopt")
+#define __DISABLE_XSAVEOPT__
+#endif /* __XSAVEOPT__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsaveopt (void *__P, long long __M)
+{
+ return __builtin_ia32_xsaveopt (__P, __M);
+}
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xsaveopt64 (void *__P, long long __M)
+{
+ return __builtin_ia32_xsaveopt64 (__P, __M);
+}
+#endif
+
+#ifdef __DISABLE_XSAVEOPT__
+#undef __DISABLE_XSAVEOPT__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVEOPT__ */
+
+#endif /* _XSAVEOPTINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/include/xtestintrin.h b/lib/gcc/x86_64-linux-android/4.9/include/xtestintrin.h
new file mode 100644
index 0000000..ba79e5c
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/include/xtestintrin.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _XTESTINTRIN_H_INCLUDED
+#define _XTESTINTRIN_H_INCLUDED
+
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
+/* Return non-zero if the instruction executes inside an RTM or HLE code
+ region. Return zero otherwise. */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_xtest (void)
+{
+ return __builtin_ia32_xtest ();
+}
+
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
+#endif /* _XTESTINTRIN_H_INCLUDED */
diff --git a/lib/gcc/x86_64-linux-android/4.9/libgcc.a b/lib/gcc/x86_64-linux-android/4.9/libgcc.a
new file mode 100644
index 0000000..eca4a22
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/libgcc.a
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/libgcov.a b/lib/gcc/x86_64-linux-android/4.9/libgcov.a
new file mode 100644
index 0000000..971c904
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/libgcov.a
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtbegin.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtbegin.o
new file mode 100644
index 0000000..2842d16
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtbegin.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginS.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginS.o
new file mode 100644
index 0000000..db7fb2e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginT.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginT.o
new file mode 100644
index 0000000..2842d16
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtbeginT.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtend.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtend.o
new file mode 100644
index 0000000..05fe6f7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtend.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtendS.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtendS.o
new file mode 100644
index 0000000..05fe6f7
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtendS.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtfastmath.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtfastmath.o
new file mode 100644
index 0000000..6041f6b
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtfastmath.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtprec32.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec32.o
new file mode 100644
index 0000000..a876c44
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec32.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtprec64.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec64.o
new file mode 100644
index 0000000..788d7ce
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec64.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/crtprec80.o b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec80.o
new file mode 100644
index 0000000..74ca907
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/crtprec80.o
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/libgcc.a b/lib/gcc/x86_64-linux-android/4.9/x32/libgcc.a
new file mode 100644
index 0000000..33fd595
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/libgcc.a
Binary files differ
diff --git a/lib/gcc/x86_64-linux-android/4.9/x32/libgcov.a b/lib/gcc/x86_64-linux-android/4.9/x32/libgcov.a
new file mode 100644
index 0000000..973c66e
--- /dev/null
+++ b/lib/gcc/x86_64-linux-android/4.9/x32/libgcov.a
Binary files differ
diff --git a/lib64/libiberty.a b/lib64/libiberty.a
new file mode 100644
index 0000000..7e81e85
--- /dev/null
+++ b/lib64/libiberty.a
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/cc1 b/libexec/gcc/x86_64-linux-android/4.9/cc1
new file mode 100755
index 0000000..8224c06
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/cc1
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/cc1plus b/libexec/gcc/x86_64-linux-android/4.9/cc1plus
new file mode 100755
index 0000000..c6716ee
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/cc1plus
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/collect2 b/libexec/gcc/x86_64-linux-android/4.9/collect2
new file mode 100755
index 0000000..35cbdc1
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/collect2
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so
new file mode 120000
index 0000000..f25ba88
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so
@@ -0,0 +1 @@
+liblto_plugin.so.0.0.0 \ No newline at end of file
diff --git a/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0 b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0
new file mode 120000
index 0000000..f25ba88
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0
@@ -0,0 +1 @@
+liblto_plugin.so.0.0.0 \ No newline at end of file
diff --git a/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0.0.0 b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0.0.0
new file mode 100755
index 0000000..d3a5b0e
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/liblto_plugin.so.0.0.0
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/lto-wrapper b/libexec/gcc/x86_64-linux-android/4.9/lto-wrapper
new file mode 100755
index 0000000..741fa53
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/lto-wrapper
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/lto1 b/libexec/gcc/x86_64-linux-android/4.9/lto1
new file mode 100755
index 0000000..9ff0c07
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/lto1
Binary files differ
diff --git a/libexec/gcc/x86_64-linux-android/4.9/plugin/gengtype b/libexec/gcc/x86_64-linux-android/4.9/plugin/gengtype
new file mode 100755
index 0000000..88acc84
--- /dev/null
+++ b/libexec/gcc/x86_64-linux-android/4.9/plugin/gengtype
Binary files differ
diff --git a/share/gdb/python/gdb/__init__.py b/share/gdb/python/gdb/__init__.py
new file mode 100644
index 0000000..6311583
--- /dev/null
+++ b/share/gdb/python/gdb/__init__.py
@@ -0,0 +1,124 @@
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import traceback
+import os
+import sys
+import _gdb
+
+if sys.version_info[0] > 2:
+ # Python 3 moved "reload"
+ from imp import reload
+
+from _gdb import *
+
+class _GdbFile (object):
+ # These two are needed in Python 3
+ encoding = "UTF-8"
+ errors = "strict"
+
+ def close(self):
+ # Do nothing.
+ return None
+
+ def isatty(self):
+ return False
+
+ def writelines(self, iterable):
+ for line in iterable:
+ self.write(line)
+
+ def flush(self):
+ flush()
+
+class GdbOutputFile (_GdbFile):
+ def write(self, s):
+ write(s, stream=STDOUT)
+
+sys.stdout = GdbOutputFile()
+
+class GdbOutputErrorFile (_GdbFile):
+ def write(self, s):
+ write(s, stream=STDERR)
+
+sys.stderr = GdbOutputErrorFile()
+
+# Default prompt hook does nothing.
+prompt_hook = None
+
+# Ensure that sys.argv is set to something.
+# We do not use PySys_SetArgvEx because it did not appear until 2.6.6.
+sys.argv = ['']
+
+# Initial pretty printers.
+pretty_printers = []
+
+# Initial type printers.
+type_printers = []
+
+# Convenience variable to GDB's python directory
+PYTHONDIR = os.path.dirname(os.path.dirname(__file__))
+
+# Auto-load all functions/commands.
+
+# Packages to auto-load.
+
+packages = [
+ 'function',
+ 'command'
+]
+
+# pkgutil.iter_modules is not available prior to Python 2.6. Instead,
+# manually iterate the list, collating the Python files in each module
+# path. Construct the module name, and import.
+
+def auto_load_packages():
+ for package in packages:
+ location = os.path.join(os.path.dirname(__file__), package)
+ if os.path.exists(location):
+ py_files = filter(lambda x: x.endswith('.py')
+ and x != '__init__.py',
+ os.listdir(location))
+
+ for py_file in py_files:
+ # Construct from foo.py, gdb.module.foo
+ modname = "%s.%s.%s" % ( __name__, package, py_file[:-3] )
+ try:
+ if modname in sys.modules:
+ # reload modules with duplicate names
+ reload(__import__(modname))
+ else:
+ __import__(modname)
+ except:
+ sys.stderr.write (traceback.format_exc() + "\n")
+
+auto_load_packages()
+
+def GdbSetPythonDirectory(dir):
+ """Update sys.path, reload gdb and auto-load packages."""
+ global PYTHONDIR
+
+ try:
+ sys.path.remove(PYTHONDIR)
+ except ValueError:
+ pass
+ sys.path.insert(0, dir)
+
+ PYTHONDIR = dir
+
+ # note that reload overwrites the gdb module without deleting existing
+ # attributes
+ reload(__import__(__name__))
+ auto_load_packages()
diff --git a/share/gdb/python/gdb/command/__init__.py b/share/gdb/python/gdb/command/__init__.py
new file mode 100644
index 0000000..21eaef8
--- /dev/null
+++ b/share/gdb/python/gdb/command/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
diff --git a/share/gdb/python/gdb/command/explore.py b/share/gdb/python/gdb/command/explore.py
new file mode 100644
index 0000000..dd77875
--- /dev/null
+++ b/share/gdb/python/gdb/command/explore.py
@@ -0,0 +1,760 @@
+# GDB 'explore' command.
+# Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Implementation of the GDB 'explore' command using the GDB Python API."""
+
+import gdb
+import sys
+
+if sys.version_info[0] > 2:
+ # Python 3 renamed raw_input to input
+ raw_input = input
+
+class Explorer(object):
+ """Internal class which invokes other explorers."""
+
+ # This map is filled by the Explorer.init_env() function
+ type_code_to_explorer_map = { }
+
+ _SCALAR_TYPE_LIST = (
+ gdb.TYPE_CODE_CHAR,
+ gdb.TYPE_CODE_INT,
+ gdb.TYPE_CODE_BOOL,
+ gdb.TYPE_CODE_FLT,
+ gdb.TYPE_CODE_VOID,
+ gdb.TYPE_CODE_ENUM,
+ )
+
+ @staticmethod
+ def guard_expr(expr):
+ length = len(expr)
+ guard = False
+
+ if expr[0] == '(' and expr[length-1] == ')':
+ pass
+ else:
+ i = 0
+ while i < length:
+ c = expr[i]
+ if (c == '_' or ('a' <= c and c <= 'z') or
+ ('A' <= c and c <= 'Z') or ('0' <= c and c <= '9')):
+ pass
+ else:
+ guard = True
+ break
+ i += 1
+
+ if guard:
+ return "(" + expr + ")"
+ else:
+ return expr
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Main function to explore an expression value.
+
+ Arguments:
+ expr: The expression string that is being explored.
+ value: The gdb.Value value of the expression.
+ is_child: Boolean value to indicate if the expression is a child.
+ An expression is a child if it is derived from the main
+ expression entered by the user. For example, if the user
+ entered an expression which evaluates to a struct, then
+ when exploring the fields of the struct, is_child is set
+ to True internally.
+
+ Returns:
+ No return value.
+ """
+ type_code = value.type.code
+ if type_code in Explorer.type_code_to_explorer_map:
+ explorer_class = Explorer.type_code_to_explorer_map[type_code]
+ while explorer_class.explore_expr(expr, value, is_child):
+ pass
+ else:
+ print ("Explorer for type '%s' not yet available.\n" %
+ str(value.type))
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Main function to explore a data type.
+
+ Arguments:
+ name: The string representing the path to the data type being
+ explored.
+ datatype: The gdb.Type value of the data type being explored.
+ is_child: Boolean value to indicate if the name is a child.
+ A name is a child if it is derived from the main name
+ entered by the user. For example, if the user entered
+ the name of struct type, then when exploring the fields
+ of the struct, is_child is set to True internally.
+
+ Returns:
+ No return value.
+ """
+ type_code = datatype.code
+ if type_code in Explorer.type_code_to_explorer_map:
+ explorer_class = Explorer.type_code_to_explorer_map[type_code]
+ while explorer_class.explore_type(name, datatype, is_child):
+ pass
+ else:
+ print ("Explorer for type '%s' not yet available.\n" %
+ str(datatype))
+
+ @staticmethod
+ def init_env():
+ """Initializes the Explorer environment.
+ This function should be invoked before starting any exploration. If
+ invoked before an exploration, it need not be invoked for subsequent
+ explorations.
+ """
+ Explorer.type_code_to_explorer_map = {
+ gdb.TYPE_CODE_CHAR : ScalarExplorer,
+ gdb.TYPE_CODE_INT : ScalarExplorer,
+ gdb.TYPE_CODE_BOOL : ScalarExplorer,
+ gdb.TYPE_CODE_FLT : ScalarExplorer,
+ gdb.TYPE_CODE_VOID : ScalarExplorer,
+ gdb.TYPE_CODE_ENUM : ScalarExplorer,
+ gdb.TYPE_CODE_STRUCT : CompoundExplorer,
+ gdb.TYPE_CODE_UNION : CompoundExplorer,
+ gdb.TYPE_CODE_PTR : PointerExplorer,
+ gdb.TYPE_CODE_REF : ReferenceExplorer,
+ gdb.TYPE_CODE_TYPEDEF : TypedefExplorer,
+ gdb.TYPE_CODE_ARRAY : ArrayExplorer
+ }
+
+ @staticmethod
+ def is_scalar_type(type):
+ """Checks whether a type is a scalar type.
+ A type is a scalar type of its type is
+ gdb.TYPE_CODE_CHAR or
+ gdb.TYPE_CODE_INT or
+ gdb.TYPE_CODE_BOOL or
+ gdb.TYPE_CODE_FLT or
+ gdb.TYPE_CODE_VOID or
+ gdb.TYPE_CODE_ENUM.
+
+ Arguments:
+ type: The type to be checked.
+
+ Returns:
+ 'True' if 'type' is a scalar type. 'False' otherwise.
+ """
+ return type.code in Explorer._SCALAR_TYPE_LIST
+
+ @staticmethod
+ def return_to_parent_value():
+ """A utility function which prints that the current exploration session
+ is returning to the parent value. Useful when exploring values.
+ """
+ print ("\nReturning to parent value...\n")
+
+ @staticmethod
+ def return_to_parent_value_prompt():
+ """A utility function which prompts the user to press the 'enter' key
+ so that the exploration session can shift back to the parent value.
+ Useful when exploring values.
+ """
+ raw_input("\nPress enter to return to parent value: ")
+
+ @staticmethod
+ def return_to_enclosing_type():
+ """A utility function which prints that the current exploration session
+ is returning to the enclosing type. Useful when exploring types.
+ """
+ print ("\nReturning to enclosing type...\n")
+
+ @staticmethod
+ def return_to_enclosing_type_prompt():
+ """A utility function which prompts the user to press the 'enter' key
+ so that the exploration session can shift back to the enclosing type.
+ Useful when exploring types.
+ """
+ raw_input("\nPress enter to return to enclosing type: ")
+
+
+class ScalarExplorer(object):
+ """Internal class used to explore scalar values."""
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore scalar values.
+ See Explorer.explore_expr and Explorer.is_scalar_type for more
+ information.
+ """
+ print ("'%s' is a scalar value of type '%s'." %
+ (expr, value.type))
+ print ("%s = %s" % (expr, str(value)))
+
+ if is_child:
+ Explorer.return_to_parent_value_prompt()
+ Explorer.return_to_parent_value()
+
+ return False
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore scalar types.
+ See Explorer.explore_type and Explorer.is_scalar_type for more
+ information.
+ """
+ if datatype.code == gdb.TYPE_CODE_ENUM:
+ if is_child:
+ print ("%s is of an enumerated type '%s'." %
+ (name, str(datatype)))
+ else:
+ print ("'%s' is an enumerated type." % name)
+ else:
+ if is_child:
+ print ("%s is of a scalar type '%s'." %
+ (name, str(datatype)))
+ else:
+ print ("'%s' is a scalar type." % name)
+
+ if is_child:
+ Explorer.return_to_enclosing_type_prompt()
+ Explorer.return_to_enclosing_type()
+
+ return False
+
+
+class PointerExplorer(object):
+ """Internal class used to explore pointer values."""
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore pointer values.
+ See Explorer.explore_expr for more information.
+ """
+ print ("'%s' is a pointer to a value of type '%s'" %
+ (expr, str(value.type.target())))
+ option = raw_input("Continue exploring it as a pointer to a single "
+ "value [y/n]: ")
+ if option == "y":
+ deref_value = None
+ try:
+ deref_value = value.dereference()
+ str(deref_value)
+ except gdb.MemoryError:
+ print ("'%s' a pointer pointing to an invalid memory "
+ "location." % expr)
+ if is_child:
+ Explorer.return_to_parent_value_prompt()
+ return False
+ Explorer.explore_expr("*%s" % Explorer.guard_expr(expr),
+ deref_value, is_child)
+ return False
+
+ option = raw_input("Continue exploring it as a pointer to an "
+ "array [y/n]: ")
+ if option == "y":
+ while True:
+ index = 0
+ try:
+ index = int(raw_input("Enter the index of the element you "
+ "want to explore in '%s': " % expr))
+ except ValueError:
+ break
+ element_expr = "%s[%d]" % (Explorer.guard_expr(expr), index)
+ element = value[index]
+ try:
+ str(element)
+ except gdb.MemoryError:
+ print ("Cannot read value at index %d." % index)
+ continue
+ Explorer.explore_expr(element_expr, element, True)
+ return False
+
+ if is_child:
+ Explorer.return_to_parent_value()
+ return False
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore pointer types.
+ See Explorer.explore_type for more information.
+ """
+ target_type = datatype.target()
+ print ("\n%s is a pointer to a value of type '%s'." %
+ (name, str(target_type)))
+
+ Explorer.explore_type("the pointee type of %s" % name,
+ target_type,
+ is_child)
+ return False
+
+
+class ReferenceExplorer(object):
+ """Internal class used to explore reference (TYPE_CODE_REF) values."""
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore array values.
+ See Explorer.explore_expr for more information.
+ """
+ referenced_value = value.referenced_value()
+ Explorer.explore_expr(expr, referenced_value, is_child)
+ return False
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore pointer types.
+ See Explorer.explore_type for more information.
+ """
+ target_type = datatype.target()
+ Explorer.explore_type(name, target_type, is_child)
+ return False
+
+
+class ArrayExplorer(object):
+ """Internal class used to explore arrays."""
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore array values.
+ See Explorer.explore_expr for more information.
+ """
+ target_type = value.type.target()
+ print ("'%s' is an array of '%s'." % (expr, str(target_type)))
+ index = 0
+ try:
+ index = int(raw_input("Enter the index of the element you want to "
+ "explore in '%s': " % expr))
+ except ValueError:
+ if is_child:
+ Explorer.return_to_parent_value()
+ return False
+
+ element = None
+ try:
+ element = value[index]
+ str(element)
+ except gdb.MemoryError:
+ print ("Cannot read value at index %d." % index)
+ raw_input("Press enter to continue... ")
+ return True
+
+ Explorer.explore_expr("%s[%d]" % (Explorer.guard_expr(expr), index),
+ element, True)
+ return True
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore array types.
+ See Explorer.explore_type for more information.
+ """
+ target_type = datatype.target()
+ print ("%s is an array of '%s'." % (name, str(target_type)))
+
+ Explorer.explore_type("the array element of %s" % name, target_type,
+ is_child)
+ return False
+
+
+class CompoundExplorer(object):
+ """Internal class used to explore struct, classes and unions."""
+
+ @staticmethod
+ def _print_fields(print_list):
+ """Internal function which prints the fields of a struct/class/union.
+ """
+ max_field_name_length = 0
+ for pair in print_list:
+ if max_field_name_length < len(pair[0]):
+ max_field_name_length = len(pair[0])
+
+ for pair in print_list:
+ print (" %*s = %s" % (max_field_name_length, pair[0], pair[1]))
+
+ @staticmethod
+ def _get_real_field_count(fields):
+ real_field_count = 0;
+ for field in fields:
+ if not field.artificial:
+ real_field_count = real_field_count + 1
+
+ return real_field_count
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore structs/classes and union values.
+ See Explorer.explore_expr for more information.
+ """
+ datatype = value.type
+ type_code = datatype.code
+ fields = datatype.fields()
+
+ if type_code == gdb.TYPE_CODE_STRUCT:
+ type_desc = "struct/class"
+ else:
+ type_desc = "union"
+
+ if CompoundExplorer._get_real_field_count(fields) == 0:
+ print ("The value of '%s' is a %s of type '%s' with no fields." %
+ (expr, type_desc, str(value.type)))
+ if is_child:
+ Explorer.return_to_parent_value_prompt()
+ return False
+
+ print ("The value of '%s' is a %s of type '%s' with the following "
+ "fields:\n" % (expr, type_desc, str(value.type)))
+
+ has_explorable_fields = False
+ choice_to_compound_field_map = { }
+ current_choice = 0
+ print_list = [ ]
+ for field in fields:
+ if field.artificial:
+ continue
+ field_full_name = Explorer.guard_expr(expr) + "." + field.name
+ if field.is_base_class:
+ field_value = value.cast(field.type)
+ else:
+ field_value = value[field.name]
+ literal_value = ""
+ if type_code == gdb.TYPE_CODE_UNION:
+ literal_value = ("<Enter %d to explore this field of type "
+ "'%s'>" % (current_choice, str(field.type)))
+ has_explorable_fields = True
+ else:
+ if Explorer.is_scalar_type(field.type):
+ literal_value = ("%s .. (Value of type '%s')" %
+ (str(field_value), str(field.type)))
+ else:
+ if field.is_base_class:
+ field_desc = "base class"
+ else:
+ field_desc = "field"
+ literal_value = ("<Enter %d to explore this %s of type "
+ "'%s'>" %
+ (current_choice, field_desc,
+ str(field.type)))
+ has_explorable_fields = True
+
+ choice_to_compound_field_map[str(current_choice)] = (
+ field_full_name, field_value)
+ current_choice = current_choice + 1
+
+ print_list.append((field.name, literal_value))
+
+ CompoundExplorer._print_fields(print_list)
+ print ("")
+
+ if has_explorable_fields:
+ choice = raw_input("Enter the field number of choice: ")
+ if choice in choice_to_compound_field_map:
+ Explorer.explore_expr(choice_to_compound_field_map[choice][0],
+ choice_to_compound_field_map[choice][1],
+ True)
+ return True
+ else:
+ if is_child:
+ Explorer.return_to_parent_value()
+ else:
+ if is_child:
+ Explorer.return_to_parent_value_prompt()
+
+ return False
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore struct/class and union types.
+ See Explorer.explore_type for more information.
+ """
+ type_code = datatype.code
+ type_desc = ""
+ if type_code == gdb.TYPE_CODE_STRUCT:
+ type_desc = "struct/class"
+ else:
+ type_desc = "union"
+
+ fields = datatype.fields()
+ if CompoundExplorer._get_real_field_count(fields) == 0:
+ if is_child:
+ print ("%s is a %s of type '%s' with no fields." %
+ (name, type_desc, str(datatype)))
+ Explorer.return_to_enclosing_type_prompt()
+ else:
+ print ("'%s' is a %s with no fields." % (name, type_desc))
+ return False
+
+ if is_child:
+ print ("%s is a %s of type '%s' "
+ "with the following fields:\n" %
+ (name, type_desc, str(datatype)))
+ else:
+ print ("'%s' is a %s with the following "
+ "fields:\n" %
+ (name, type_desc))
+
+ has_explorable_fields = False
+ current_choice = 0
+ choice_to_compound_field_map = { }
+ print_list = [ ]
+ for field in fields:
+ if field.artificial:
+ continue
+ if field.is_base_class:
+ field_desc = "base class"
+ else:
+ field_desc = "field"
+ rhs = ("<Enter %d to explore this %s of type '%s'>" %
+ (current_choice, field_desc, str(field.type)))
+ print_list.append((field.name, rhs))
+ choice_to_compound_field_map[str(current_choice)] = (
+ field.name, field.type, field_desc)
+ current_choice = current_choice + 1
+
+ CompoundExplorer._print_fields(print_list)
+ print ("")
+
+ if len(choice_to_compound_field_map) > 0:
+ choice = raw_input("Enter the field number of choice: ")
+ if choice in choice_to_compound_field_map:
+ if is_child:
+ new_name = ("%s '%s' of %s" %
+ (choice_to_compound_field_map[choice][2],
+ choice_to_compound_field_map[choice][0],
+ name))
+ else:
+ new_name = ("%s '%s' of '%s'" %
+ (choice_to_compound_field_map[choice][2],
+ choice_to_compound_field_map[choice][0],
+ name))
+ Explorer.explore_type(new_name,
+ choice_to_compound_field_map[choice][1], True)
+ return True
+ else:
+ if is_child:
+ Explorer.return_to_enclosing_type()
+ else:
+ if is_child:
+ Explorer.return_to_enclosing_type_prompt()
+
+ return False
+
+
+class TypedefExplorer(object):
+ """Internal class used to explore values whose type is a typedef."""
+
+ @staticmethod
+ def explore_expr(expr, value, is_child):
+ """Function to explore typedef values.
+ See Explorer.explore_expr for more information.
+ """
+ actual_type = value.type.strip_typedefs()
+ print ("The value of '%s' is of type '%s' "
+ "which is a typedef of type '%s'" %
+ (expr, str(value.type), str(actual_type)))
+
+ Explorer.explore_expr(expr, value.cast(actual_type), is_child)
+ return False
+
+ @staticmethod
+ def explore_type(name, datatype, is_child):
+ """Function to explore typedef types.
+ See Explorer.explore_type for more information.
+ """
+ actual_type = datatype.strip_typedefs()
+ if is_child:
+ print ("The type of %s is a typedef of type '%s'." %
+ (name, str(actual_type)))
+ else:
+ print ("The type '%s' is a typedef of type '%s'." %
+ (name, str(actual_type)))
+
+ Explorer.explore_type(name, actual_type, is_child)
+ return False
+
+
+class ExploreUtils(object):
+ """Internal class which provides utilities for the main command classes."""
+
+ @staticmethod
+ def check_args(name, arg_str):
+ """Utility to check if adequate number of arguments are passed to an
+ explore command.
+
+ Arguments:
+ name: The name of the explore command.
+ arg_str: The argument string passed to the explore command.
+
+ Returns:
+ True if adequate arguments are passed, false otherwise.
+
+ Raises:
+ gdb.GdbError if adequate arguments are not passed.
+ """
+ if len(arg_str) < 1:
+ raise gdb.GdbError("ERROR: '%s' requires an argument."
+ % name)
+ return False
+ else:
+ return True
+
+ @staticmethod
+ def get_type_from_str(type_str):
+ """A utility function to deduce the gdb.Type value from a string
+ representing the type.
+
+ Arguments:
+ type_str: The type string from which the gdb.Type value should be
+ deduced.
+
+ Returns:
+ The deduced gdb.Type value if possible, None otherwise.
+ """
+ try:
+ # Assume the current language to be C/C++ and make a try.
+ return gdb.parse_and_eval("(%s *)0" % type_str).type.target()
+ except RuntimeError:
+ # If assumption of current language to be C/C++ was wrong, then
+ # lookup the type using the API.
+ try:
+ return gdb.lookup_type(type_str)
+ except RuntimeError:
+ return None
+
+ @staticmethod
+ def get_value_from_str(value_str):
+ """A utility function to deduce the gdb.Value value from a string
+ representing the value.
+
+ Arguments:
+ value_str: The value string from which the gdb.Value value should
+ be deduced.
+
+ Returns:
+ The deduced gdb.Value value if possible, None otherwise.
+ """
+ try:
+ return gdb.parse_and_eval(value_str)
+ except RuntimeError:
+ return None
+
+
+class ExploreCommand(gdb.Command):
+ """Explore a value or a type valid in the current context.
+
+ Usage:
+
+ explore ARG
+
+ - ARG is either a valid expression or a type name.
+ - At any stage of exploration, hit the return key (instead of a
+ choice, if any) to return to the enclosing type or value.
+ """
+
+ def __init__(self):
+ super(ExploreCommand, self).__init__(name = "explore",
+ command_class = gdb.COMMAND_DATA,
+ prefix = True)
+
+ def invoke(self, arg_str, from_tty):
+ if ExploreUtils.check_args("explore", arg_str) == False:
+ return
+
+ # Check if it is a value
+ value = ExploreUtils.get_value_from_str(arg_str)
+ if value is not None:
+ Explorer.explore_expr(arg_str, value, False)
+ return
+
+ # If it is not a value, check if it is a type
+ datatype = ExploreUtils.get_type_from_str(arg_str)
+ if datatype is not None:
+ Explorer.explore_type(arg_str, datatype, False)
+ return
+
+ # If it is neither a value nor a type, raise an error.
+ raise gdb.GdbError(
+ ("'%s' neither evaluates to a value nor is a type "
+ "in the current context." %
+ arg_str))
+
+
+class ExploreValueCommand(gdb.Command):
+ """Explore value of an expression valid in the current context.
+
+ Usage:
+
+ explore value ARG
+
+ - ARG is a valid expression.
+ - At any stage of exploration, hit the return key (instead of a
+ choice, if any) to return to the enclosing value.
+ """
+
+ def __init__(self):
+ super(ExploreValueCommand, self).__init__(
+ name = "explore value", command_class = gdb.COMMAND_DATA)
+
+ def invoke(self, arg_str, from_tty):
+ if ExploreUtils.check_args("explore value", arg_str) == False:
+ return
+
+ value = ExploreUtils.get_value_from_str(arg_str)
+ if value is None:
+ raise gdb.GdbError(
+ (" '%s' does not evaluate to a value in the current "
+ "context." %
+ arg_str))
+ return
+
+ Explorer.explore_expr(arg_str, value, False)
+
+
+class ExploreTypeCommand(gdb.Command):
+ """Explore a type or the type of an expression valid in the current
+ context.
+
+ Usage:
+
+ explore type ARG
+
+ - ARG is a valid expression or a type name.
+ - At any stage of exploration, hit the return key (instead of a
+ choice, if any) to return to the enclosing type.
+ """
+
+ def __init__(self):
+ super(ExploreTypeCommand, self).__init__(
+ name = "explore type", command_class = gdb.COMMAND_DATA)
+
+ def invoke(self, arg_str, from_tty):
+ if ExploreUtils.check_args("explore type", arg_str) == False:
+ return
+
+ datatype = ExploreUtils.get_type_from_str(arg_str)
+ if datatype is not None:
+ Explorer.explore_type(arg_str, datatype, False)
+ return
+
+ value = ExploreUtils.get_value_from_str(arg_str)
+ if value is not None:
+ print ("'%s' is of type '%s'." % (arg_str, str(value.type)))
+ Explorer.explore_type(str(value.type), value.type, False)
+ return
+
+ raise gdb.GdbError(("'%s' is not a type or value in the current "
+ "context." % arg_str))
+
+
+Explorer.init_env()
+
+ExploreCommand()
+ExploreValueCommand()
+ExploreTypeCommand()
diff --git a/share/gdb/python/gdb/command/pretty_printers.py b/share/gdb/python/gdb/command/pretty_printers.py
new file mode 100644
index 0000000..7b03e3a
--- /dev/null
+++ b/share/gdb/python/gdb/command/pretty_printers.py
@@ -0,0 +1,368 @@
+# Pretty-printer commands.
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""GDB commands for working with pretty-printers."""
+
+import copy
+import gdb
+import re
+
+
+def parse_printer_regexps(arg):
+ """Internal utility to parse a pretty-printer command argv.
+
+ Arguments:
+ arg: The arguments to the command. The format is:
+ [object-regexp [name-regexp]].
+ Individual printers in a collection are named as
+ printer-name;subprinter-name.
+
+ Returns:
+ The result is a 3-tuple of compiled regular expressions, except that
+ the resulting compiled subprinter regexp is None if not provided.
+
+ Raises:
+ SyntaxError: an error processing ARG
+ """
+
+ argv = gdb.string_to_argv(arg);
+ argc = len(argv)
+ object_regexp = "" # match everything
+ name_regexp = "" # match everything
+ subname_regexp = None
+ if argc > 3:
+ raise SyntaxError("too many arguments")
+ if argc >= 1:
+ object_regexp = argv[0]
+ if argc >= 2:
+ name_subname = argv[1].split(";", 1)
+ name_regexp = name_subname[0]
+ if len(name_subname) == 2:
+ subname_regexp = name_subname[1]
+ # That re.compile raises SyntaxError was determined empirically.
+ # We catch it and reraise it to provide a slightly more useful
+ # error message for the user.
+ try:
+ object_re = re.compile(object_regexp)
+ except SyntaxError:
+ raise SyntaxError("invalid object regexp: %s" % object_regexp)
+ try:
+ name_re = re.compile (name_regexp)
+ except SyntaxError:
+ raise SyntaxError("invalid name regexp: %s" % name_regexp)
+ if subname_regexp is not None:
+ try:
+ subname_re = re.compile(subname_regexp)
+ except SyntaxError:
+ raise SyntaxError("invalid subname regexp: %s" % subname_regexp)
+ else:
+ subname_re = None
+ return(object_re, name_re, subname_re)
+
+
+def printer_enabled_p(printer):
+ """Internal utility to see if printer (or subprinter) is enabled."""
+ if hasattr(printer, "enabled"):
+ return printer.enabled
+ else:
+ return True
+
+
+class InfoPrettyPrinter(gdb.Command):
+ """GDB command to list all registered pretty-printers.
+
+ Usage: info pretty-printer [object-regexp [name-regexp]]
+
+ OBJECT-REGEXP is a regular expression matching the objects to list.
+ Objects are "global", the program space's file, and the objfiles within
+ that program space.
+
+ NAME-REGEXP matches the name of the pretty-printer.
+ Individual printers in a collection are named as
+ printer-name;subprinter-name.
+ """
+
+ def __init__ (self):
+ super(InfoPrettyPrinter, self).__init__("info pretty-printer",
+ gdb.COMMAND_DATA)
+
+ @staticmethod
+ def enabled_string(printer):
+ """Return "" if PRINTER is enabled, otherwise " [disabled]"."""
+ if printer_enabled_p(printer):
+ return ""
+ else:
+ return " [disabled]"
+
+ @staticmethod
+ def printer_name(printer):
+ """Return the printer's name."""
+ if hasattr(printer, "name"):
+ return printer.name
+ if hasattr(printer, "__name__"):
+ return printer.__name__
+ # This "shouldn't happen", but the public API allows for
+ # direct additions to the pretty-printer list, and we shouldn't
+ # crash because someone added a bogus printer.
+ # Plus we want to give the user a way to list unknown printers.
+ return "unknown"
+
+ def list_pretty_printers(self, pretty_printers, name_re, subname_re):
+ """Print a list of pretty-printers."""
+ # A potential enhancement is to provide an option to list printers in
+ # "lookup order" (i.e. unsorted).
+ sorted_pretty_printers = sorted (copy.copy(pretty_printers),
+ key = self.printer_name)
+ for printer in sorted_pretty_printers:
+ name = self.printer_name(printer)
+ enabled = self.enabled_string(printer)
+ if name_re.match(name):
+ print (" %s%s" % (name, enabled))
+ if (hasattr(printer, "subprinters") and
+ printer.subprinters is not None):
+ sorted_subprinters = sorted (copy.copy(printer.subprinters),
+ key = self.printer_name)
+ for subprinter in sorted_subprinters:
+ if (not subname_re or
+ subname_re.match(subprinter.name)):
+ print (" %s%s" %
+ (subprinter.name,
+ self.enabled_string(subprinter)))
+
+ def invoke1(self, title, printer_list,
+ obj_name_to_match, object_re, name_re, subname_re):
+ """Subroutine of invoke to simplify it."""
+ if printer_list and object_re.match(obj_name_to_match):
+ print (title)
+ self.list_pretty_printers(printer_list, name_re, subname_re)
+
+ def invoke(self, arg, from_tty):
+ """GDB calls this to perform the command."""
+ (object_re, name_re, subname_re) = parse_printer_regexps(arg)
+ self.invoke1("global pretty-printers:", gdb.pretty_printers,
+ "global", object_re, name_re, subname_re)
+ cp = gdb.current_progspace()
+ self.invoke1("progspace %s pretty-printers:" % cp.filename,
+ cp.pretty_printers, "progspace",
+ object_re, name_re, subname_re)
+ for objfile in gdb.objfiles():
+ self.invoke1(" objfile %s pretty-printers:" % objfile.filename,
+ objfile.pretty_printers, objfile.filename,
+ object_re, name_re, subname_re)
+
+
+def count_enabled_printers(pretty_printers):
+ """Return a 2-tuple of number of enabled and total printers."""
+ enabled = 0
+ total = 0
+ for printer in pretty_printers:
+ if (hasattr(printer, "subprinters")
+ and printer.subprinters is not None):
+ if printer_enabled_p(printer):
+ for subprinter in printer.subprinters:
+ if printer_enabled_p(subprinter):
+ enabled += 1
+ total += len(printer.subprinters)
+ else:
+ if printer_enabled_p(printer):
+ enabled += 1
+ total += 1
+ return (enabled, total)
+
+
+def count_all_enabled_printers():
+ """Return a 2-tuble of the enabled state and total number of all printers.
+ This includes subprinters.
+ """
+ enabled_count = 0
+ total_count = 0
+ (t_enabled, t_total) = count_enabled_printers(gdb.pretty_printers)
+ enabled_count += t_enabled
+ total_count += t_total
+ (t_enabled, t_total) = count_enabled_printers(gdb.current_progspace().pretty_printers)
+ enabled_count += t_enabled
+ total_count += t_total
+ for objfile in gdb.objfiles():
+ (t_enabled, t_total) = count_enabled_printers(objfile.pretty_printers)
+ enabled_count += t_enabled
+ total_count += t_total
+ return (enabled_count, total_count)
+
+
+def pluralize(text, n, suffix="s"):
+ """Return TEXT pluralized if N != 1."""
+ if n != 1:
+ return "%s%s" % (text, suffix)
+ else:
+ return text
+
+
+def show_pretty_printer_enabled_summary():
+ """Print the number of printers enabled/disabled.
+ We count subprinters individually.
+ """
+ (enabled_count, total_count) = count_all_enabled_printers()
+ print ("%d of %d printers enabled" % (enabled_count, total_count))
+
+
+def do_enable_pretty_printer_1 (pretty_printers, name_re, subname_re, flag):
+ """Worker for enabling/disabling pretty-printers.
+
+ Arguments:
+ pretty_printers: list of pretty-printers
+ name_re: regular-expression object to select printers
+ subname_re: regular expression object to select subprinters or None
+ if all are affected
+ flag: True for Enable, False for Disable
+
+ Returns:
+ The number of printers affected.
+ This is just for informational purposes for the user.
+ """
+ total = 0
+ for printer in pretty_printers:
+ if (hasattr(printer, "name") and name_re.match(printer.name) or
+ hasattr(printer, "__name__") and name_re.match(printer.__name__)):
+ if (hasattr(printer, "subprinters") and
+ printer.subprinters is not None):
+ if not subname_re:
+ # Only record printers that change state.
+ if printer_enabled_p(printer) != flag:
+ for subprinter in printer.subprinters:
+ if printer_enabled_p(subprinter):
+ total += 1
+ # NOTE: We preserve individual subprinter settings.
+ printer.enabled = flag
+ else:
+ # NOTE: Whether this actually disables the subprinter
+ # depends on whether the printer's lookup function supports
+ # the "enable" API. We can only assume it does.
+ for subprinter in printer.subprinters:
+ if subname_re.match(subprinter.name):
+ # Only record printers that change state.
+ if (printer_enabled_p(printer) and
+ printer_enabled_p(subprinter) != flag):
+ total += 1
+ subprinter.enabled = flag
+ else:
+ # This printer has no subprinters.
+ # If the user does "disable pretty-printer .* .* foo"
+ # should we disable printers that don't have subprinters?
+ # How do we apply "foo" in this context? Since there is no
+ # "foo" subprinter it feels like we should skip this printer.
+ # There's still the issue of how to handle
+ # "disable pretty-printer .* .* .*", and every other variation
+ # that can match everything. For now punt and only support
+ # "disable pretty-printer .* .*" (i.e. subname is elided)
+ # to disable everything.
+ if not subname_re:
+ # Only record printers that change state.
+ if printer_enabled_p(printer) != flag:
+ total += 1
+ printer.enabled = flag
+ return total
+
+
+def do_enable_pretty_printer (arg, flag):
+ """Internal worker for enabling/disabling pretty-printers."""
+ (object_re, name_re, subname_re) = parse_printer_regexps(arg)
+
+ total = 0
+ if object_re.match("global"):
+ total += do_enable_pretty_printer_1(gdb.pretty_printers,
+ name_re, subname_re, flag)
+ cp = gdb.current_progspace()
+ if object_re.match("progspace"):
+ total += do_enable_pretty_printer_1(cp.pretty_printers,
+ name_re, subname_re, flag)
+ for objfile in gdb.objfiles():
+ if object_re.match(objfile.filename):
+ total += do_enable_pretty_printer_1(objfile.pretty_printers,
+ name_re, subname_re, flag)
+
+ if flag:
+ state = "enabled"
+ else:
+ state = "disabled"
+ print ("%d %s %s" % (total, pluralize("printer", total), state))
+
+ # Print the total list of printers currently enabled/disabled.
+ # This is to further assist the user in determining whether the result
+ # is expected. Since we use regexps to select it's useful.
+ show_pretty_printer_enabled_summary()
+
+
+# Enable/Disable one or more pretty-printers.
+#
+# This is intended for use when a broken pretty-printer is shipped/installed
+# and the user wants to disable that printer without disabling all the other
+# printers.
+#
+# A useful addition would be -v (verbose) to show each printer affected.
+
+class EnablePrettyPrinter (gdb.Command):
+ """GDB command to enable the specified pretty-printer.
+
+ Usage: enable pretty-printer [object-regexp [name-regexp]]
+
+ OBJECT-REGEXP is a regular expression matching the objects to examine.
+ Objects are "global", the program space's file, and the objfiles within
+ that program space.
+
+ NAME-REGEXP matches the name of the pretty-printer.
+ Individual printers in a collection are named as
+ printer-name;subprinter-name.
+ """
+
+ def __init__(self):
+ super(EnablePrettyPrinter, self).__init__("enable pretty-printer",
+ gdb.COMMAND_DATA)
+
+ def invoke(self, arg, from_tty):
+ """GDB calls this to perform the command."""
+ do_enable_pretty_printer(arg, True)
+
+
+class DisablePrettyPrinter (gdb.Command):
+ """GDB command to disable the specified pretty-printer.
+
+ Usage: disable pretty-printer [object-regexp [name-regexp]]
+
+ OBJECT-REGEXP is a regular expression matching the objects to examine.
+ Objects are "global", the program space's file, and the objfiles within
+ that program space.
+
+ NAME-REGEXP matches the name of the pretty-printer.
+ Individual printers in a collection are named as
+ printer-name;subprinter-name.
+ """
+
+ def __init__(self):
+ super(DisablePrettyPrinter, self).__init__("disable pretty-printer",
+ gdb.COMMAND_DATA)
+
+ def invoke(self, arg, from_tty):
+ """GDB calls this to perform the command."""
+ do_enable_pretty_printer(arg, False)
+
+
+def register_pretty_printer_commands():
+ """Call from a top level script to install the pretty-printer commands."""
+ InfoPrettyPrinter()
+ EnablePrettyPrinter()
+ DisablePrettyPrinter()
+
+register_pretty_printer_commands()
diff --git a/share/gdb/python/gdb/command/prompt.py b/share/gdb/python/gdb/command/prompt.py
new file mode 100644
index 0000000..394e40c
--- /dev/null
+++ b/share/gdb/python/gdb/command/prompt.py
@@ -0,0 +1,66 @@
+# Extended prompt.
+# Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""GDB command for working with extended prompts."""
+
+import gdb
+import gdb.prompt
+
+class _ExtendedPrompt(gdb.Parameter):
+
+ """Set the extended prompt.
+
+Usage: set extended-prompt VALUE
+
+Substitutions are applied to VALUE to compute the real prompt.
+
+The currently defined substitutions are:
+
+"""
+ # Add the prompt library's dynamically generated help to the
+ # __doc__ string.
+ __doc__ = __doc__ + gdb.prompt.prompt_help()
+
+ set_doc = "Set the extended prompt."
+ show_doc = "Show the extended prompt."
+
+ def __init__(self):
+ super(_ExtendedPrompt, self).__init__("extended-prompt",
+ gdb.COMMAND_SUPPORT,
+ gdb.PARAM_STRING_NOESCAPE)
+ self.value = ''
+ self.hook_set = False
+
+ def get_show_string (self, pvalue):
+ if self.value is not '':
+ return "The extended prompt is: " + self.value
+ else:
+ return "The extended prompt is not set."
+
+ def get_set_string (self):
+ if self.hook_set == False:
+ gdb.prompt_hook = self.before_prompt_hook
+ self.hook_set = True
+ return ""
+
+ def before_prompt_hook(self, current):
+ if self.value is not '':
+ newprompt = gdb.prompt.substitute_prompt(self.value)
+ return newprompt.replace('\\', '\\\\')
+ else:
+ return None
+
+_ExtendedPrompt()
diff --git a/share/gdb/python/gdb/command/type_printers.py b/share/gdb/python/gdb/command/type_printers.py
new file mode 100644
index 0000000..81f2ea1
--- /dev/null
+++ b/share/gdb/python/gdb/command/type_printers.py
@@ -0,0 +1,125 @@
+# Type printer commands.
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import copy
+import gdb
+
+"""GDB commands for working with type-printers."""
+
+class InfoTypePrinter(gdb.Command):
+ """GDB command to list all registered type-printers.
+
+ Usage: info type-printers
+ """
+
+ def __init__ (self):
+ super(InfoTypePrinter, self).__init__("info type-printers",
+ gdb.COMMAND_DATA)
+
+ def list_type_printers(self, type_printers):
+ """Print a list of type printers."""
+ # A potential enhancement is to provide an option to list printers in
+ # "lookup order" (i.e. unsorted).
+ sorted_type_printers = sorted (copy.copy(type_printers),
+ key = lambda x: x.name)
+ for printer in sorted_type_printers:
+ if printer.enabled:
+ enabled = ''
+ else:
+ enabled = " [disabled]"
+ print (" %s%s" % (printer.name, enabled))
+
+ def invoke(self, arg, from_tty):
+ """GDB calls this to perform the command."""
+ sep = ''
+ for objfile in gdb.objfiles():
+ if objfile.type_printers:
+ print ("%sType printers for %s:" % (sep, objfile.name))
+ self.list_type_printers(objfile.type_printers)
+ sep = '\n'
+ if gdb.current_progspace().type_printers:
+ print ("%sType printers for program space:" % sep)
+ self.list_type_printers(gdb.current_progspace().type_printers)
+ sep = '\n'
+ if gdb.type_printers:
+ print ("%sGlobal type printers:" % sep)
+ self.list_type_printers(gdb.type_printers)
+
+class _EnableOrDisableCommand(gdb.Command):
+ def __init__(self, setting, name):
+ super(_EnableOrDisableCommand, self).__init__(name, gdb.COMMAND_DATA)
+ self.setting = setting
+
+ def set_some(self, name, printers):
+ result = False
+ for p in printers:
+ if name == p.name:
+ p.enabled = self.setting
+ result = True
+ return result
+
+ def invoke(self, arg, from_tty):
+ """GDB calls this to perform the command."""
+ for name in arg.split():
+ ok = False
+ for objfile in gdb.objfiles():
+ if self.set_some(name, objfile.type_printers):
+ ok = True
+ if self.set_some(name, gdb.current_progspace().type_printers):
+ ok = True
+ if self.set_some(name, gdb.type_printers):
+ ok = True
+ if not ok:
+ print ("No type printer named '%s'" % name)
+
+ def add_some(self, result, word, printers):
+ for p in printers:
+ if p.name.startswith(word):
+ result.append(p.name)
+
+ def complete(self, text, word):
+ result = []
+ for objfile in gdb.objfiles():
+ self.add_some(result, word, objfile.type_printers)
+ self.add_some(result, word, gdb.current_progspace().type_printers)
+ self.add_some(result, word, gdb.type_printers)
+ return result
+
+class EnableTypePrinter(_EnableOrDisableCommand):
+ """GDB command to enable the specified type printer.
+
+ Usage: enable type-printer NAME
+
+ NAME is the name of the type-printer.
+ """
+
+ def __init__(self):
+ super(EnableTypePrinter, self).__init__(True, "enable type-printer")
+
+class DisableTypePrinter(_EnableOrDisableCommand):
+ """GDB command to disable the specified type-printer.
+
+ Usage: disable type-printer NAME
+
+ NAME is the name of the type-printer.
+ """
+
+ def __init__(self):
+ super(DisableTypePrinter, self).__init__(False, "disable type-printer")
+
+InfoTypePrinter()
+EnableTypePrinter()
+DisableTypePrinter()
diff --git a/share/gdb/python/gdb/function/__init__.py b/share/gdb/python/gdb/function/__init__.py
new file mode 100644
index 0000000..755bff9
--- /dev/null
+++ b/share/gdb/python/gdb/function/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
diff --git a/share/gdb/python/gdb/function/strfns.py b/share/gdb/python/gdb/function/strfns.py
new file mode 100644
index 0000000..efdf950
--- /dev/null
+++ b/share/gdb/python/gdb/function/strfns.py
@@ -0,0 +1,108 @@
+# Useful gdb string convenience functions.
+# Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""$_memeq, $_strlen, $_streq, $_regex"""
+
+import gdb
+import re
+
+
+class _MemEq(gdb.Function):
+ """$_memeq - compare bytes of memory
+
+Usage:
+ $_memeq(a, b, len)
+
+Returns:
+ True if len bytes at a and b compare equally.
+"""
+ def __init__(self):
+ super(_MemEq, self).__init__("_memeq")
+
+ def invoke(self, a, b, length):
+ if length < 0:
+ raise ValueError("length must be non-negative")
+ if length == 0:
+ return True
+ # The argument(s) to vector are [low_bound,]high_bound.
+ byte_vector = gdb.lookup_type("char").vector(length - 1)
+ ptr_byte_vector = byte_vector.pointer()
+ a_ptr = a.reinterpret_cast(ptr_byte_vector)
+ b_ptr = b.reinterpret_cast(ptr_byte_vector)
+ return a_ptr.dereference() == b_ptr.dereference()
+
+
+class _StrLen(gdb.Function):
+ """$_strlen - compute string length
+
+Usage:
+ $_strlen(a)
+
+Returns:
+ Length of string a, assumed to be a string in the current language.
+"""
+ def __init__(self):
+ super(_StrLen, self).__init__("_strlen")
+
+ def invoke(self, a):
+ s = a.string()
+ return len(s)
+
+
+class _StrEq(gdb.Function):
+ """$_streq - check string equality
+
+Usage:
+ $_streq(a, b)
+
+Returns:
+ True if a and b are identical strings in the current language.
+
+Example (amd64-linux):
+ catch syscall open
+ cond $bpnum $_streq((char*) $rdi, "foo")
+"""
+ def __init__(self):
+ super(_StrEq, self).__init__("_streq")
+
+ def invoke(self, a, b):
+ return a.string() == b.string()
+
+
+class _RegEx(gdb.Function):
+ """$_regex - check if a string matches a regular expression
+
+Usage:
+ $_regex(string, regex)
+
+Returns:
+ True if string str (in the current language) matches the
+ regular expression regex.
+"""
+ def __init__(self):
+ super(_RegEx, self).__init__("_regex")
+
+ def invoke(self, string, regex):
+ s = string.string()
+ r = re.compile(regex.string())
+ return bool(r.match(s))
+
+
+# GDB will import us automagically via gdb/__init__.py.
+_MemEq()
+_StrLen()
+_StrEq()
+_RegEx()
diff --git a/share/gdb/python/gdb/printing.py b/share/gdb/python/gdb/printing.py
new file mode 100644
index 0000000..785a407
--- /dev/null
+++ b/share/gdb/python/gdb/printing.py
@@ -0,0 +1,263 @@
+# Pretty-printer utilities.
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Utilities for working with pretty-printers."""
+
+import gdb
+import gdb.types
+import re
+import sys
+
+if sys.version_info[0] > 2:
+ # Python 3 removed basestring and long
+ basestring = str
+ long = int
+
+class PrettyPrinter(object):
+ """A basic pretty-printer.
+
+ Attributes:
+ name: A unique string among all printers for the context in which
+ it is defined (objfile, progspace, or global(gdb)), and should
+ meaningfully describe what can be pretty-printed.
+ E.g., "StringPiece" or "protobufs".
+ subprinters: An iterable object with each element having a `name'
+ attribute, and, potentially, "enabled" attribute.
+ Or this is None if there are no subprinters.
+ enabled: A boolean indicating if the printer is enabled.
+
+ Subprinters are for situations where "one" pretty-printer is actually a
+ collection of several printers. E.g., The libstdc++ pretty-printer has
+ a pretty-printer for each of several different types, based on regexps.
+ """
+
+ # While one might want to push subprinters into the subclass, it's
+ # present here to formalize such support to simplify
+ # commands/pretty_printers.py.
+
+ def __init__(self, name, subprinters=None):
+ self.name = name
+ self.subprinters = subprinters
+ self.enabled = True
+
+ def __call__(self, val):
+ # The subclass must define this.
+ raise NotImplementedError("PrettyPrinter __call__")
+
+
+class SubPrettyPrinter(object):
+ """Baseclass for sub-pretty-printers.
+
+ Sub-pretty-printers needn't use this, but it formalizes what's needed.
+
+ Attributes:
+ name: The name of the subprinter.
+ enabled: A boolean indicating if the subprinter is enabled.
+ """
+
+ def __init__(self, name):
+ self.name = name
+ self.enabled = True
+
+
+def register_pretty_printer(obj, printer, replace=False):
+ """Register pretty-printer PRINTER with OBJ.
+
+ The printer is added to the front of the search list, thus one can override
+ an existing printer if one needs to. Use a different name when overriding
+ an existing printer, otherwise an exception will be raised; multiple
+ printers with the same name are disallowed.
+
+ Arguments:
+ obj: Either an objfile, progspace, or None (in which case the printer
+ is registered globally).
+ printer: Either a function of one argument (old way) or any object
+ which has attributes: name, enabled, __call__.
+ replace: If True replace any existing copy of the printer.
+ Otherwise if the printer already exists raise an exception.
+
+ Returns:
+ Nothing.
+
+ Raises:
+ TypeError: A problem with the type of the printer.
+ ValueError: The printer's name contains a semicolon ";".
+ RuntimeError: A printer with the same name is already registered.
+
+ If the caller wants the printer to be listable and disableable, it must
+ follow the PrettyPrinter API. This applies to the old way (functions) too.
+ If printer is an object, __call__ is a method of two arguments:
+ self, and the value to be pretty-printed. See PrettyPrinter.
+ """
+
+ # Watch for both __name__ and name.
+ # Functions get the former for free, but we don't want to use an
+ # attribute named __foo__ for pretty-printers-as-objects.
+ # If printer has both, we use `name'.
+ if not hasattr(printer, "__name__") and not hasattr(printer, "name"):
+ raise TypeError("printer missing attribute: name")
+ if hasattr(printer, "name") and not hasattr(printer, "enabled"):
+ raise TypeError("printer missing attribute: enabled")
+ if not hasattr(printer, "__call__"):
+ raise TypeError("printer missing attribute: __call__")
+
+ if obj is None:
+ if gdb.parameter("verbose"):
+ gdb.write("Registering global %s pretty-printer ...\n" % name)
+ obj = gdb
+ else:
+ if gdb.parameter("verbose"):
+ gdb.write("Registering %s pretty-printer for %s ...\n" %
+ (printer.name, obj.filename))
+
+ if hasattr(printer, "name"):
+ if not isinstance(printer.name, basestring):
+ raise TypeError("printer name is not a string")
+ # If printer provides a name, make sure it doesn't contain ";".
+ # Semicolon is used by the info/enable/disable pretty-printer commands
+ # to delimit subprinters.
+ if printer.name.find(";") >= 0:
+ raise ValueError("semicolon ';' in printer name")
+ # Also make sure the name is unique.
+ # Alas, we can't do the same for functions and __name__, they could
+ # all have a canonical name like "lookup_function".
+ # PERF: gdb records printers in a list, making this inefficient.
+ i = 0
+ for p in obj.pretty_printers:
+ if hasattr(p, "name") and p.name == printer.name:
+ if replace:
+ del obj.pretty_printers[i]
+ break
+ else:
+ raise RuntimeError("pretty-printer already registered: %s" %
+ printer.name)
+ i = i + 1
+
+ obj.pretty_printers.insert(0, printer)
+
+
+class RegexpCollectionPrettyPrinter(PrettyPrinter):
+ """Class for implementing a collection of regular-expression based pretty-printers.
+
+ Intended usage:
+
+ pretty_printer = RegexpCollectionPrettyPrinter("my_library")
+ pretty_printer.add_printer("myclass1", "^myclass1$", MyClass1Printer)
+ ...
+ pretty_printer.add_printer("myclassN", "^myclassN$", MyClassNPrinter)
+ register_pretty_printer(obj, pretty_printer)
+ """
+
+ class RegexpSubprinter(SubPrettyPrinter):
+ def __init__(self, name, regexp, gen_printer):
+ super(RegexpCollectionPrettyPrinter.RegexpSubprinter, self).__init__(name)
+ self.regexp = regexp
+ self.gen_printer = gen_printer
+ self.compiled_re = re.compile(regexp)
+
+ def __init__(self, name):
+ super(RegexpCollectionPrettyPrinter, self).__init__(name, [])
+
+ def add_printer(self, name, regexp, gen_printer):
+ """Add a printer to the list.
+
+ The printer is added to the end of the list.
+
+ Arguments:
+ name: The name of the subprinter.
+ regexp: The regular expression, as a string.
+ gen_printer: A function/method that given a value returns an
+ object to pretty-print it.
+
+ Returns:
+ Nothing.
+ """
+
+ # NOTE: A previous version made the name of each printer the regexp.
+ # That makes it awkward to pass to the enable/disable commands (it's
+ # cumbersome to make a regexp of a regexp). So now the name is a
+ # separate parameter.
+
+ self.subprinters.append(self.RegexpSubprinter(name, regexp,
+ gen_printer))
+
+ def __call__(self, val):
+ """Lookup the pretty-printer for the provided value."""
+
+ # Get the type name.
+ typename = gdb.types.get_basic_type(val.type).tag
+ if not typename:
+ return None
+
+ # Iterate over table of type regexps to determine
+ # if a printer is registered for that type.
+ # Return an instantiation of the printer if found.
+ for printer in self.subprinters:
+ if printer.enabled and printer.compiled_re.search(typename):
+ return printer.gen_printer(val)
+
+ # Cannot find a pretty printer. Return None.
+ return None
+
+# A helper class for printing enum types. This class is instantiated
+# with a list of enumerators to print a particular Value.
+class _EnumInstance:
+ def __init__(self, enumerators, val):
+ self.enumerators = enumerators
+ self.val = val
+
+ def to_string(self):
+ flag_list = []
+ v = long(self.val)
+ any_found = False
+ for (e_name, e_value) in self.enumerators:
+ if v & e_value != 0:
+ flag_list.append(e_name)
+ v = v & ~e_value
+ any_found = True
+ if not any_found or v != 0:
+ # Leftover value.
+ flag_list.append('<unknown: 0x%x>' % v)
+ return "0x%x [%s]" % (self.val, " | ".join(flag_list))
+
+class FlagEnumerationPrinter(PrettyPrinter):
+ """A pretty-printer which can be used to print a flag-style enumeration.
+ A flag-style enumeration is one where the enumerators are or'd
+ together to create values. The new printer will print these
+ symbolically using '|' notation. The printer must be registered
+ manually. This printer is most useful when an enum is flag-like,
+ but has some overlap. GDB's built-in printing will not handle
+ this case, but this printer will attempt to."""
+
+ def __init__(self, enum_type):
+ super(FlagEnumerationPrinter, self).__init__(enum_type)
+ self.initialized = False
+
+ def __call__(self, val):
+ if not self.initialized:
+ self.initialized = True
+ flags = gdb.lookup_type(self.name)
+ self.enumerators = []
+ for field in flags.fields():
+ self.enumerators.append((field.name, field.enumval))
+ # Sorting the enumerators by value usually does the right
+ # thing.
+ self.enumerators.sort(key = lambda x: x.enumval)
+
+ if self.enabled:
+ return _EnumInstance(self.enumerators, val)
+ else:
+ return None
diff --git a/share/gdb/python/gdb/prompt.py b/share/gdb/python/gdb/prompt.py
new file mode 100644
index 0000000..bb1975b
--- /dev/null
+++ b/share/gdb/python/gdb/prompt.py
@@ -0,0 +1,148 @@
+# Extended prompt utilities.
+# Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+""" Extended prompt library functions."""
+
+import gdb
+import os
+
+def _prompt_pwd(ignore):
+ "The current working directory."
+ return os.getcwdu()
+
+def _prompt_object_attr(func, what, attr, nattr):
+ """Internal worker for fetching GDB attributes."""
+ if attr is None:
+ attr = nattr
+ try:
+ obj = func()
+ except gdb.error:
+ return '<no %s>' % what
+ if hasattr(obj, attr):
+ result = getattr(obj, attr)
+ if callable(result):
+ result = result()
+ return result
+ else:
+ return '<no attribute %s on current %s>' % (attr, what)
+
+def _prompt_frame(attr):
+ "The selected frame; an argument names a frame parameter."
+ return _prompt_object_attr(gdb.selected_frame, 'frame', attr, 'name')
+
+def _prompt_thread(attr):
+ "The selected thread; an argument names a thread parameter."
+ return _prompt_object_attr(gdb.selected_thread, 'thread', attr, 'num')
+
+def _prompt_version(attr):
+ "The version of GDB."
+ return gdb.VERSION
+
+def _prompt_esc(attr):
+ "The ESC character."
+ return '\033'
+
+def _prompt_bs(attr):
+ "A backslash."
+ return '\\'
+
+def _prompt_n(attr):
+ "A newline."
+ return '\n'
+
+def _prompt_r(attr):
+ "A carriage return."
+ return '\r'
+
+def _prompt_param(attr):
+ "A parameter's value; the argument names the parameter."
+ return gdb.parameter(attr)
+
+def _prompt_noprint_begin(attr):
+ "Begins a sequence of non-printing characters."
+ return '\001'
+
+def _prompt_noprint_end(attr):
+ "Ends a sequence of non-printing characters."
+ return '\002'
+
+prompt_substitutions = {
+ 'e': _prompt_esc,
+ '\\': _prompt_bs,
+ 'n': _prompt_n,
+ 'r': _prompt_r,
+ 'v': _prompt_version,
+ 'w': _prompt_pwd,
+ 'f': _prompt_frame,
+ 't': _prompt_thread,
+ 'p': _prompt_param,
+ '[': _prompt_noprint_begin,
+ ']': _prompt_noprint_end
+}
+
+def prompt_help():
+ """Generate help dynamically from the __doc__ strings of attribute
+ functions."""
+
+ result = ''
+ keys = sorted (prompt_substitutions.keys())
+ for key in keys:
+ result += ' \\%s\t%s\n' % (key, prompt_substitutions[key].__doc__)
+ result += """
+A substitution can be used in a simple form, like "\\f".
+An argument can also be passed to it, like "\\f{name}".
+The meaning of the argument depends on the particular substitution."""
+ return result
+
+def substitute_prompt(prompt):
+ "Perform substitutions on PROMPT."
+
+ result = ''
+ plen = len(prompt)
+ i = 0
+ while i < plen:
+ if prompt[i] == '\\':
+ i = i + 1
+ if i >= plen:
+ break
+ cmdch = prompt[i]
+
+ if cmdch in prompt_substitutions:
+ cmd = prompt_substitutions[cmdch]
+
+ if i + 1 < plen and prompt[i + 1] == '{':
+ j = i + 1
+ while j < plen and prompt[j] != '}':
+ j = j + 1
+ # Just ignore formatting errors.
+ if j >= plen or prompt[j] != '}':
+ arg = None
+ else:
+ arg = prompt[i + 2 : j]
+ i = j
+ else:
+ arg = None
+ result += str(cmd(arg))
+ else:
+ # Unrecognized escapes are turned into the escaped
+ # character itself.
+ result += prompt[i]
+ else:
+ result += prompt[i]
+
+ i = i + 1
+
+ return result
diff --git a/share/gdb/python/gdb/types.py b/share/gdb/python/gdb/types.py
new file mode 100644
index 0000000..ffc817c
--- /dev/null
+++ b/share/gdb/python/gdb/types.py
@@ -0,0 +1,176 @@
+# Type utilities.
+# Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Utilities for working with gdb.Types."""
+
+import gdb
+
+
+def get_basic_type(type_):
+ """Return the "basic" type of a type.
+
+ Arguments:
+ type_: The type to reduce to its basic type.
+
+ Returns:
+ type_ with const/volatile is stripped away,
+ and typedefs/references converted to the underlying type.
+ """
+
+ while (type_.code == gdb.TYPE_CODE_REF or
+ type_.code == gdb.TYPE_CODE_TYPEDEF):
+ if type_.code == gdb.TYPE_CODE_REF:
+ type_ = type_.target()
+ else:
+ type_ = type_.strip_typedefs()
+ return type_.unqualified()
+
+
+def has_field(type_, field):
+ """Return True if a type has the specified field.
+
+ Arguments:
+ type_: The type to examine.
+ It must be one of gdb.TYPE_CODE_STRUCT, gdb.TYPE_CODE_UNION.
+ field: The name of the field to look up.
+
+ Returns:
+ True if the field is present either in type_ or any baseclass.
+
+ Raises:
+ TypeError: The type is not a struct or union.
+ """
+
+ type_ = get_basic_type(type_)
+ if (type_.code != gdb.TYPE_CODE_STRUCT and
+ type_.code != gdb.TYPE_CODE_UNION):
+ raise TypeError("not a struct or union")
+ for f in type_.fields():
+ if f.is_base_class:
+ if has_field(f.type, field):
+ return True
+ else:
+ # NOTE: f.name could be None
+ if f.name == field:
+ return True
+ return False
+
+
+def make_enum_dict(enum_type):
+ """Return a dictionary from a program's enum type.
+
+ Arguments:
+ enum_type: The enum to compute the dictionary for.
+
+ Returns:
+ The dictionary of the enum.
+
+ Raises:
+ TypeError: The type is not an enum.
+ """
+
+ if enum_type.code != gdb.TYPE_CODE_ENUM:
+ raise TypeError("not an enum type")
+ enum_dict = {}
+ for field in enum_type.fields():
+ # The enum's value is stored in "enumval".
+ enum_dict[field.name] = field.enumval
+ return enum_dict
+
+
+def deep_items (type_):
+ """Return an iterator that recursively traverses anonymous fields.
+
+ Arguments:
+ type_: The type to traverse. It should be one of
+ gdb.TYPE_CODE_STRUCT or gdb.TYPE_CODE_UNION.
+
+ Returns:
+ an iterator similar to gdb.Type.iteritems(), i.e., it returns
+ pairs of key, value, but for any anonymous struct or union
+ field that field is traversed recursively, depth-first.
+ """
+ for k, v in type_.iteritems ():
+ if k:
+ yield k, v
+ else:
+ for i in deep_items (v.type):
+ yield i
+
+class TypePrinter(object):
+ """The base class for type printers.
+
+ Instances of this type can be used to substitute type names during
+ 'ptype'.
+
+ A type printer must have at least 'name' and 'enabled' attributes,
+ and supply an 'instantiate' method.
+
+ The 'instantiate' method must either return None, or return an
+ object which has a 'recognize' method. This method must accept a
+ gdb.Type argument and either return None, meaning that the type
+ was not recognized, or a string naming the type.
+ """
+
+ def __init__(self, name):
+ self.name = name
+ self.enabled = True
+
+ def instantiate(self):
+ return None
+
+# Helper function for computing the list of type recognizers.
+def _get_some_type_recognizers(result, plist):
+ for printer in plist:
+ if printer.enabled:
+ inst = printer.instantiate()
+ if inst is not None:
+ result.append(inst)
+ return None
+
+def get_type_recognizers():
+ "Return a list of the enabled type recognizers for the current context."
+ result = []
+
+ # First try the objfiles.
+ for objfile in gdb.objfiles():
+ _get_some_type_recognizers(result, objfile.type_printers)
+ # Now try the program space.
+ _get_some_type_recognizers(result, gdb.current_progspace().type_printers)
+ # Finally, globals.
+ _get_some_type_recognizers(result, gdb.type_printers)
+
+ return result
+
+def apply_type_recognizers(recognizers, type_obj):
+ """Apply the given list of type recognizers to the type TYPE_OBJ.
+ If any recognizer in the list recognizes TYPE_OBJ, returns the name
+ given by the recognizer. Otherwise, this returns None."""
+ for r in recognizers:
+ result = r.recognize(type_obj)
+ if result is not None:
+ return result
+ return None
+
+def register_type_printer(locus, printer):
+ """Register a type printer.
+ PRINTER is the type printer instance.
+ LOCUS is either an objfile, a program space, or None, indicating
+ global registration."""
+
+ if locus is None:
+ locus = gdb
+ locus.type_printers.insert(0, printer)
diff --git a/share/gdb/syscalls/amd64-linux.xml b/share/gdb/syscalls/amd64-linux.xml
new file mode 100644
index 0000000..bf3da5d
--- /dev/null
+++ b/share/gdb/syscalls/amd64-linux.xml
@@ -0,0 +1,314 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/x86/include/asm/unistd_64.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="read" number="0"/>
+ <syscall name="write" number="1"/>
+ <syscall name="open" number="2"/>
+ <syscall name="close" number="3"/>
+ <syscall name="stat" number="4"/>
+ <syscall name="fstat" number="5"/>
+ <syscall name="lstat" number="6"/>
+ <syscall name="poll" number="7"/>
+ <syscall name="lseek" number="8"/>
+ <syscall name="mmap" number="9"/>
+ <syscall name="mprotect" number="10"/>
+ <syscall name="munmap" number="11"/>
+ <syscall name="brk" number="12"/>
+ <syscall name="rt_sigaction" number="13"/>
+ <syscall name="rt_sigprocmask" number="14"/>
+ <syscall name="rt_sigreturn" number="15"/>
+ <syscall name="ioctl" number="16"/>
+ <syscall name="pread64" number="17"/>
+ <syscall name="pwrite64" number="18"/>
+ <syscall name="readv" number="19"/>
+ <syscall name="writev" number="20"/>
+ <syscall name="access" number="21"/>
+ <syscall name="pipe" number="22"/>
+ <syscall name="select" number="23"/>
+ <syscall name="sched_yield" number="24"/>
+ <syscall name="mremap" number="25"/>
+ <syscall name="msync" number="26"/>
+ <syscall name="mincore" number="27"/>
+ <syscall name="madvise" number="28"/>
+ <syscall name="shmget" number="29"/>
+ <syscall name="shmat" number="30"/>
+ <syscall name="shmctl" number="31"/>
+ <syscall name="dup" number="32"/>
+ <syscall name="dup2" number="33"/>
+ <syscall name="pause" number="34"/>
+ <syscall name="nanosleep" number="35"/>
+ <syscall name="getitimer" number="36"/>
+ <syscall name="alarm" number="37"/>
+ <syscall name="setitimer" number="38"/>
+ <syscall name="getpid" number="39"/>
+ <syscall name="sendfile" number="40"/>
+ <syscall name="socket" number="41"/>
+ <syscall name="connect" number="42"/>
+ <syscall name="accept" number="43"/>
+ <syscall name="sendto" number="44"/>
+ <syscall name="recvfrom" number="45"/>
+ <syscall name="sendmsg" number="46"/>
+ <syscall name="recvmsg" number="47"/>
+ <syscall name="shutdown" number="48"/>
+ <syscall name="bind" number="49"/>
+ <syscall name="listen" number="50"/>
+ <syscall name="getsockname" number="51"/>
+ <syscall name="getpeername" number="52"/>
+ <syscall name="socketpair" number="53"/>
+ <syscall name="setsockopt" number="54"/>
+ <syscall name="getsockopt" number="55"/>
+ <syscall name="clone" number="56"/>
+ <syscall name="fork" number="57"/>
+ <syscall name="vfork" number="58"/>
+ <syscall name="execve" number="59"/>
+ <syscall name="exit" number="60"/>
+ <syscall name="wait4" number="61"/>
+ <syscall name="kill" number="62"/>
+ <syscall name="uname" number="63"/>
+ <syscall name="semget" number="64"/>
+ <syscall name="semop" number="65"/>
+ <syscall name="semctl" number="66"/>
+ <syscall name="shmdt" number="67"/>
+ <syscall name="msgget" number="68"/>
+ <syscall name="msgsnd" number="69"/>
+ <syscall name="msgrcv" number="70"/>
+ <syscall name="msgctl" number="71"/>
+ <syscall name="fcntl" number="72"/>
+ <syscall name="flock" number="73"/>
+ <syscall name="fsync" number="74"/>
+ <syscall name="fdatasync" number="75"/>
+ <syscall name="truncate" number="76"/>
+ <syscall name="ftruncate" number="77"/>
+ <syscall name="getdents" number="78"/>
+ <syscall name="getcwd" number="79"/>
+ <syscall name="chdir" number="80"/>
+ <syscall name="fchdir" number="81"/>
+ <syscall name="rename" number="82"/>
+ <syscall name="mkdir" number="83"/>
+ <syscall name="rmdir" number="84"/>
+ <syscall name="creat" number="85"/>
+ <syscall name="link" number="86"/>
+ <syscall name="unlink" number="87"/>
+ <syscall name="symlink" number="88"/>
+ <syscall name="readlink" number="89"/>
+ <syscall name="chmod" number="90"/>
+ <syscall name="fchmod" number="91"/>
+ <syscall name="chown" number="92"/>
+ <syscall name="fchown" number="93"/>
+ <syscall name="lchown" number="94"/>
+ <syscall name="umask" number="95"/>
+ <syscall name="gettimeofday" number="96"/>
+ <syscall name="getrlimit" number="97"/>
+ <syscall name="getrusage" number="98"/>
+ <syscall name="sysinfo" number="99"/>
+ <syscall name="times" number="100"/>
+ <syscall name="ptrace" number="101"/>
+ <syscall name="getuid" number="102"/>
+ <syscall name="syslog" number="103"/>
+ <syscall name="getgid" number="104"/>
+ <syscall name="setuid" number="105"/>
+ <syscall name="setgid" number="106"/>
+ <syscall name="geteuid" number="107"/>
+ <syscall name="getegid" number="108"/>
+ <syscall name="setpgid" number="109"/>
+ <syscall name="getppid" number="110"/>
+ <syscall name="getpgrp" number="111"/>
+ <syscall name="setsid" number="112"/>
+ <syscall name="setreuid" number="113"/>
+ <syscall name="setregid" number="114"/>
+ <syscall name="getgroups" number="115"/>
+ <syscall name="setgroups" number="116"/>
+ <syscall name="setresuid" number="117"/>
+ <syscall name="getresuid" number="118"/>
+ <syscall name="setresgid" number="119"/>
+ <syscall name="getresgid" number="120"/>
+ <syscall name="getpgid" number="121"/>
+ <syscall name="setfsuid" number="122"/>
+ <syscall name="setfsgid" number="123"/>
+ <syscall name="getsid" number="124"/>
+ <syscall name="capget" number="125"/>
+ <syscall name="capset" number="126"/>
+ <syscall name="rt_sigpending" number="127"/>
+ <syscall name="rt_sigtimedwait" number="128"/>
+ <syscall name="rt_sigqueueinfo" number="129"/>
+ <syscall name="rt_sigsuspend" number="130"/>
+ <syscall name="sigaltstack" number="131"/>
+ <syscall name="utime" number="132"/>
+ <syscall name="mknod" number="133"/>
+ <syscall name="uselib" number="134"/>
+ <syscall name="personality" number="135"/>
+ <syscall name="ustat" number="136"/>
+ <syscall name="statfs" number="137"/>
+ <syscall name="fstatfs" number="138"/>
+ <syscall name="sysfs" number="139"/>
+ <syscall name="getpriority" number="140"/>
+ <syscall name="setpriority" number="141"/>
+ <syscall name="sched_setparam" number="142"/>
+ <syscall name="sched_getparam" number="143"/>
+ <syscall name="sched_setscheduler" number="144"/>
+ <syscall name="sched_getscheduler" number="145"/>
+ <syscall name="sched_get_priority_max" number="146"/>
+ <syscall name="sched_get_priority_min" number="147"/>
+ <syscall name="sched_rr_get_interval" number="148"/>
+ <syscall name="mlock" number="149"/>
+ <syscall name="munlock" number="150"/>
+ <syscall name="mlockall" number="151"/>
+ <syscall name="munlockall" number="152"/>
+ <syscall name="vhangup" number="153"/>
+ <syscall name="modify_ldt" number="154"/>
+ <syscall name="pivot_root" number="155"/>
+ <syscall name="_sysctl" number="156"/>
+ <syscall name="prctl" number="157"/>
+ <syscall name="arch_prctl" number="158"/>
+ <syscall name="adjtimex" number="159"/>
+ <syscall name="setrlimit" number="160"/>
+ <syscall name="chroot" number="161"/>
+ <syscall name="sync" number="162"/>
+ <syscall name="acct" number="163"/>
+ <syscall name="settimeofday" number="164"/>
+ <syscall name="mount" number="165"/>
+ <syscall name="umount2" number="166"/>
+ <syscall name="swapon" number="167"/>
+ <syscall name="swapoff" number="168"/>
+ <syscall name="reboot" number="169"/>
+ <syscall name="sethostname" number="170"/>
+ <syscall name="setdomainname" number="171"/>
+ <syscall name="iopl" number="172"/>
+ <syscall name="ioperm" number="173"/>
+ <syscall name="create_module" number="174"/>
+ <syscall name="init_module" number="175"/>
+ <syscall name="delete_module" number="176"/>
+ <syscall name="get_kernel_syms" number="177"/>
+ <syscall name="query_module" number="178"/>
+ <syscall name="quotactl" number="179"/>
+ <syscall name="nfsservctl" number="180"/>
+ <syscall name="getpmsg" number="181"/>
+ <syscall name="putpmsg" number="182"/>
+ <syscall name="afs_syscall" number="183"/>
+ <syscall name="tuxcall" number="184"/>
+ <syscall name="security" number="185"/>
+ <syscall name="gettid" number="186"/>
+ <syscall name="readahead" number="187"/>
+ <syscall name="setxattr" number="188"/>
+ <syscall name="lsetxattr" number="189"/>
+ <syscall name="fsetxattr" number="190"/>
+ <syscall name="getxattr" number="191"/>
+ <syscall name="lgetxattr" number="192"/>
+ <syscall name="fgetxattr" number="193"/>
+ <syscall name="listxattr" number="194"/>
+ <syscall name="llistxattr" number="195"/>
+ <syscall name="flistxattr" number="196"/>
+ <syscall name="removexattr" number="197"/>
+ <syscall name="lremovexattr" number="198"/>
+ <syscall name="fremovexattr" number="199"/>
+ <syscall name="tkill" number="200"/>
+ <syscall name="time" number="201"/>
+ <syscall name="futex" number="202"/>
+ <syscall name="sched_setaffinity" number="203"/>
+ <syscall name="sched_getaffinity" number="204"/>
+ <syscall name="set_thread_area" number="205"/>
+ <syscall name="io_setup" number="206"/>
+ <syscall name="io_destroy" number="207"/>
+ <syscall name="io_getevents" number="208"/>
+ <syscall name="io_submit" number="209"/>
+ <syscall name="io_cancel" number="210"/>
+ <syscall name="get_thread_area" number="211"/>
+ <syscall name="lookup_dcookie" number="212"/>
+ <syscall name="epoll_create" number="213"/>
+ <syscall name="epoll_ctl_old" number="214"/>
+ <syscall name="epoll_wait_old" number="215"/>
+ <syscall name="remap_file_pages" number="216"/>
+ <syscall name="getdents64" number="217"/>
+ <syscall name="set_tid_address" number="218"/>
+ <syscall name="restart_syscall" number="219"/>
+ <syscall name="semtimedop" number="220"/>
+ <syscall name="fadvise64" number="221"/>
+ <syscall name="timer_create" number="222"/>
+ <syscall name="timer_settime" number="223"/>
+ <syscall name="timer_gettime" number="224"/>
+ <syscall name="timer_getoverrun" number="225"/>
+ <syscall name="timer_delete" number="226"/>
+ <syscall name="clock_settime" number="227"/>
+ <syscall name="clock_gettime" number="228"/>
+ <syscall name="clock_getres" number="229"/>
+ <syscall name="clock_nanosleep" number="230"/>
+ <syscall name="exit_group" number="231"/>
+ <syscall name="epoll_wait" number="232"/>
+ <syscall name="epoll_ctl" number="233"/>
+ <syscall name="tgkill" number="234"/>
+ <syscall name="utimes" number="235"/>
+ <syscall name="vserver" number="236"/>
+ <syscall name="mbind" number="237"/>
+ <syscall name="set_mempolicy" number="238"/>
+ <syscall name="get_mempolicy" number="239"/>
+ <syscall name="mq_open" number="240"/>
+ <syscall name="mq_unlink" number="241"/>
+ <syscall name="mq_timedsend" number="242"/>
+ <syscall name="mq_timedreceive" number="243"/>
+ <syscall name="mq_notify" number="244"/>
+ <syscall name="mq_getsetattr" number="245"/>
+ <syscall name="kexec_load" number="246"/>
+ <syscall name="waitid" number="247"/>
+ <syscall name="add_key" number="248"/>
+ <syscall name="request_key" number="249"/>
+ <syscall name="keyctl" number="250"/>
+ <syscall name="ioprio_set" number="251"/>
+ <syscall name="ioprio_get" number="252"/>
+ <syscall name="inotify_init" number="253"/>
+ <syscall name="inotify_add_watch" number="254"/>
+ <syscall name="inotify_rm_watch" number="255"/>
+ <syscall name="migrate_pages" number="256"/>
+ <syscall name="openat" number="257"/>
+ <syscall name="mkdirat" number="258"/>
+ <syscall name="mknodat" number="259"/>
+ <syscall name="fchownat" number="260"/>
+ <syscall name="futimesat" number="261"/>
+ <syscall name="newfstatat" number="262"/>
+ <syscall name="unlinkat" number="263"/>
+ <syscall name="renameat" number="264"/>
+ <syscall name="linkat" number="265"/>
+ <syscall name="symlinkat" number="266"/>
+ <syscall name="readlinkat" number="267"/>
+ <syscall name="fchmodat" number="268"/>
+ <syscall name="faccessat" number="269"/>
+ <syscall name="pselect6" number="270"/>
+ <syscall name="ppoll" number="271"/>
+ <syscall name="unshare" number="272"/>
+ <syscall name="set_robust_list" number="273"/>
+ <syscall name="get_robust_list" number="274"/>
+ <syscall name="splice" number="275"/>
+ <syscall name="tee" number="276"/>
+ <syscall name="sync_file_range" number="277"/>
+ <syscall name="vmsplice" number="278"/>
+ <syscall name="move_pages" number="279"/>
+ <syscall name="utimensat" number="280"/>
+ <syscall name="epoll_pwait" number="281"/>
+ <syscall name="signalfd" number="282"/>
+ <syscall name="timerfd_create" number="283"/>
+ <syscall name="eventfd" number="284"/>
+ <syscall name="fallocate" number="285"/>
+ <syscall name="timerfd_settime" number="286"/>
+ <syscall name="timerfd_gettime" number="287"/>
+ <syscall name="accept4" number="288"/>
+ <syscall name="signalfd4" number="289"/>
+ <syscall name="eventfd2" number="290"/>
+ <syscall name="epoll_create1" number="291"/>
+ <syscall name="dup3" number="292"/>
+ <syscall name="pipe2" number="293"/>
+ <syscall name="inotify_init1" number="294"/>
+ <syscall name="preadv" number="295"/>
+ <syscall name="pwritev" number="296"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/gdb-syscalls.dtd b/share/gdb/syscalls/gdb-syscalls.dtd
new file mode 100644
index 0000000..05c1ccf
--- /dev/null
+++ b/share/gdb/syscalls/gdb-syscalls.dtd
@@ -0,0 +1,14 @@
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!-- The root element of a syscall info is <syscalls-info>. -->
+
+<!ELEMENT syscalls-info (syscall*)>
+
+<!ELEMENT syscall EMPTY>
+<!ATTLIST syscall
+ name CDATA #REQUIRED
+ number CDATA #REQUIRED>
diff --git a/share/gdb/syscalls/i386-linux.xml b/share/gdb/syscalls/i386-linux.xml
new file mode 100644
index 0000000..80512d8
--- /dev/null
+++ b/share/gdb/syscalls/i386-linux.xml
@@ -0,0 +1,340 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/x86/include/asm/unistd_32.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="restart_syscall" number="0"/>
+ <syscall name="exit" number="1"/>
+ <syscall name="fork" number="2"/>
+ <syscall name="read" number="3"/>
+ <syscall name="write" number="4"/>
+ <syscall name="open" number="5"/>
+ <syscall name="close" number="6"/>
+ <syscall name="waitpid" number="7"/>
+ <syscall name="creat" number="8"/>
+ <syscall name="link" number="9"/>
+ <syscall name="unlink" number="10"/>
+ <syscall name="execve" number="11"/>
+ <syscall name="chdir" number="12"/>
+ <syscall name="time" number="13"/>
+ <syscall name="mknod" number="14"/>
+ <syscall name="chmod" number="15"/>
+ <syscall name="lchown" number="16"/>
+ <syscall name="break" number="17"/>
+ <syscall name="oldstat" number="18"/>
+ <syscall name="lseek" number="19"/>
+ <syscall name="getpid" number="20"/>
+ <syscall name="mount" number="21"/>
+ <syscall name="umount" number="22"/>
+ <syscall name="setuid" number="23"/>
+ <syscall name="getuid" number="24"/>
+ <syscall name="stime" number="25"/>
+ <syscall name="ptrace" number="26"/>
+ <syscall name="alarm" number="27"/>
+ <syscall name="oldfstat" number="28"/>
+ <syscall name="pause" number="29"/>
+ <syscall name="utime" number="30"/>
+ <syscall name="stty" number="31"/>
+ <syscall name="gtty" number="32"/>
+ <syscall name="access" number="33"/>
+ <syscall name="nice" number="34"/>
+ <syscall name="ftime" number="35"/>
+ <syscall name="sync" number="36"/>
+ <syscall name="kill" number="37"/>
+ <syscall name="rename" number="38"/>
+ <syscall name="mkdir" number="39"/>
+ <syscall name="rmdir" number="40"/>
+ <syscall name="dup" number="41"/>
+ <syscall name="pipe" number="42"/>
+ <syscall name="times" number="43"/>
+ <syscall name="prof" number="44"/>
+ <syscall name="brk" number="45"/>
+ <syscall name="setgid" number="46"/>
+ <syscall name="getgid" number="47"/>
+ <syscall name="signal" number="48"/>
+ <syscall name="geteuid" number="49"/>
+ <syscall name="getegid" number="50"/>
+ <syscall name="acct" number="51"/>
+ <syscall name="umount2" number="52"/>
+ <syscall name="lock" number="53"/>
+ <syscall name="ioctl" number="54"/>
+ <syscall name="fcntl" number="55"/>
+ <syscall name="mpx" number="56"/>
+ <syscall name="setpgid" number="57"/>
+ <syscall name="ulimit" number="58"/>
+ <syscall name="oldolduname" number="59"/>
+ <syscall name="umask" number="60"/>
+ <syscall name="chroot" number="61"/>
+ <syscall name="ustat" number="62"/>
+ <syscall name="dup2" number="63"/>
+ <syscall name="getppid" number="64"/>
+ <syscall name="getpgrp" number="65"/>
+ <syscall name="setsid" number="66"/>
+ <syscall name="sigaction" number="67"/>
+ <syscall name="sgetmask" number="68"/>
+ <syscall name="ssetmask" number="69"/>
+ <syscall name="setreuid" number="70"/>
+ <syscall name="setregid" number="71"/>
+ <syscall name="sigsuspend" number="72"/>
+ <syscall name="sigpending" number="73"/>
+ <syscall name="sethostname" number="74"/>
+ <syscall name="setrlimit" number="75"/>
+ <syscall name="getrlimit" number="76"/>
+ <syscall name="getrusage" number="77"/>
+ <syscall name="gettimeofday" number="78"/>
+ <syscall name="settimeofday" number="79"/>
+ <syscall name="getgroups" number="80"/>
+ <syscall name="setgroups" number="81"/>
+ <syscall name="select" number="82"/>
+ <syscall name="symlink" number="83"/>
+ <syscall name="oldlstat" number="84"/>
+ <syscall name="readlink" number="85"/>
+ <syscall name="uselib" number="86"/>
+ <syscall name="swapon" number="87"/>
+ <syscall name="reboot" number="88"/>
+ <syscall name="readdir" number="89"/>
+ <syscall name="mmap" number="90"/>
+ <syscall name="munmap" number="91"/>
+ <syscall name="truncate" number="92"/>
+ <syscall name="ftruncate" number="93"/>
+ <syscall name="fchmod" number="94"/>
+ <syscall name="fchown" number="95"/>
+ <syscall name="getpriority" number="96"/>
+ <syscall name="setpriority" number="97"/>
+ <syscall name="profil" number="98"/>
+ <syscall name="statfs" number="99"/>
+ <syscall name="fstatfs" number="100"/>
+ <syscall name="ioperm" number="101"/>
+ <syscall name="socketcall" number="102"/>
+ <syscall name="syslog" number="103"/>
+ <syscall name="setitimer" number="104"/>
+ <syscall name="getitimer" number="105"/>
+ <syscall name="stat" number="106"/>
+ <syscall name="lstat" number="107"/>
+ <syscall name="fstat" number="108"/>
+ <syscall name="olduname" number="109"/>
+ <syscall name="iopl" number="110"/>
+ <syscall name="vhangup" number="111"/>
+ <syscall name="idle" number="112"/>
+ <syscall name="vm86old" number="113"/>
+ <syscall name="wait4" number="114"/>
+ <syscall name="swapoff" number="115"/>
+ <syscall name="sysinfo" number="116"/>
+ <syscall name="ipc" number="117"/>
+ <syscall name="fsync" number="118"/>
+ <syscall name="sigreturn" number="119"/>
+ <syscall name="clone" number="120"/>
+ <syscall name="setdomainname" number="121"/>
+ <syscall name="uname" number="122"/>
+ <syscall name="modify_ldt" number="123"/>
+ <syscall name="adjtimex" number="124"/>
+ <syscall name="mprotect" number="125"/>
+ <syscall name="sigprocmask" number="126"/>
+ <syscall name="create_module" number="127"/>
+ <syscall name="init_module" number="128"/>
+ <syscall name="delete_module" number="129"/>
+ <syscall name="get_kernel_syms" number="130"/>
+ <syscall name="quotactl" number="131"/>
+ <syscall name="getpgid" number="132"/>
+ <syscall name="fchdir" number="133"/>
+ <syscall name="bdflush" number="134"/>
+ <syscall name="sysfs" number="135"/>
+ <syscall name="personality" number="136"/>
+ <syscall name="afs_syscall" number="137"/>
+ <syscall name="setfsuid" number="138"/>
+ <syscall name="setfsgid" number="139"/>
+ <syscall name="_llseek" number="140"/>
+ <syscall name="getdents" number="141"/>
+ <syscall name="_newselect" number="142"/>
+ <syscall name="flock" number="143"/>
+ <syscall name="msync" number="144"/>
+ <syscall name="readv" number="145"/>
+ <syscall name="writev" number="146"/>
+ <syscall name="getsid" number="147"/>
+ <syscall name="fdatasync" number="148"/>
+ <syscall name="_sysctl" number="149"/>
+ <syscall name="mlock" number="150"/>
+ <syscall name="munlock" number="151"/>
+ <syscall name="mlockall" number="152"/>
+ <syscall name="munlockall" number="153"/>
+ <syscall name="sched_setparam" number="154"/>
+ <syscall name="sched_getparam" number="155"/>
+ <syscall name="sched_setscheduler" number="156"/>
+ <syscall name="sched_getscheduler" number="157"/>
+ <syscall name="sched_yield" number="158"/>
+ <syscall name="sched_get_priority_max" number="159"/>
+ <syscall name="sched_get_priority_min" number="160"/>
+ <syscall name="sched_rr_get_interval" number="161"/>
+ <syscall name="nanosleep" number="162"/>
+ <syscall name="mremap" number="163"/>
+ <syscall name="setresuid" number="164"/>
+ <syscall name="getresuid" number="165"/>
+ <syscall name="vm86" number="166"/>
+ <syscall name="query_module" number="167"/>
+ <syscall name="poll" number="168"/>
+ <syscall name="nfsservctl" number="169"/>
+ <syscall name="setresgid" number="170"/>
+ <syscall name="getresgid" number="171"/>
+ <syscall name="prctl" number="172"/>
+ <syscall name="rt_sigreturn" number="173"/>
+ <syscall name="rt_sigaction" number="174"/>
+ <syscall name="rt_sigprocmask" number="175"/>
+ <syscall name="rt_sigpending" number="176"/>
+ <syscall name="rt_sigtimedwait" number="177"/>
+ <syscall name="rt_sigqueueinfo" number="178"/>
+ <syscall name="rt_sigsuspend" number="179"/>
+ <syscall name="pread64" number="180"/>
+ <syscall name="pwrite64" number="181"/>
+ <syscall name="chown" number="182"/>
+ <syscall name="getcwd" number="183"/>
+ <syscall name="capget" number="184"/>
+ <syscall name="capset" number="185"/>
+ <syscall name="sigaltstack" number="186"/>
+ <syscall name="sendfile" number="187"/>
+ <syscall name="getpmsg" number="188"/>
+ <syscall name="putpmsg" number="189"/>
+ <syscall name="vfork" number="190"/>
+ <syscall name="ugetrlimit" number="191"/>
+ <syscall name="mmap2" number="192"/>
+ <syscall name="truncate64" number="193"/>
+ <syscall name="ftruncate64" number="194"/>
+ <syscall name="stat64" number="195"/>
+ <syscall name="lstat64" number="196"/>
+ <syscall name="fstat64" number="197"/>
+ <syscall name="lchown32" number="198"/>
+ <syscall name="getuid32" number="199"/>
+ <syscall name="getgid32" number="200"/>
+ <syscall name="geteuid32" number="201"/>
+ <syscall name="getegid32" number="202"/>
+ <syscall name="setreuid32" number="203"/>
+ <syscall name="setregid32" number="204"/>
+ <syscall name="getgroups32" number="205"/>
+ <syscall name="setgroups32" number="206"/>
+ <syscall name="fchown32" number="207"/>
+ <syscall name="setresuid32" number="208"/>
+ <syscall name="getresuid32" number="209"/>
+ <syscall name="setresgid32" number="210"/>
+ <syscall name="getresgid32" number="211"/>
+ <syscall name="chown32" number="212"/>
+ <syscall name="setuid32" number="213"/>
+ <syscall name="setgid32" number="214"/>
+ <syscall name="setfsuid32" number="215"/>
+ <syscall name="setfsgid32" number="216"/>
+ <syscall name="pivot_root" number="217"/>
+ <syscall name="mincore" number="218"/>
+ <syscall name="madvise" number="219"/>
+ <syscall name="madvise1" number="220"/>
+ <syscall name="getdents64" number="221"/>
+ <syscall name="fcntl64" number="222"/>
+ <syscall name="gettid" number="224"/>
+ <syscall name="readahead" number="225"/>
+ <syscall name="setxattr" number="226"/>
+ <syscall name="lsetxattr" number="227"/>
+ <syscall name="fsetxattr" number="228"/>
+ <syscall name="getxattr" number="229"/>
+ <syscall name="lgetxattr" number="230"/>
+ <syscall name="fgetxattr" number="231"/>
+ <syscall name="listxattr" number="232"/>
+ <syscall name="llistxattr" number="233"/>
+ <syscall name="flistxattr" number="234"/>
+ <syscall name="removexattr" number="235"/>
+ <syscall name="lremovexattr" number="236"/>
+ <syscall name="fremovexattr" number="237"/>
+ <syscall name="tkill" number="238"/>
+ <syscall name="sendfile64" number="239"/>
+ <syscall name="futex" number="240"/>
+ <syscall name="sched_setaffinity" number="241"/>
+ <syscall name="sched_getaffinity" number="242"/>
+ <syscall name="set_thread_area" number="243"/>
+ <syscall name="get_thread_area" number="244"/>
+ <syscall name="io_setup" number="245"/>
+ <syscall name="io_destroy" number="246"/>
+ <syscall name="io_getevents" number="247"/>
+ <syscall name="io_submit" number="248"/>
+ <syscall name="io_cancel" number="249"/>
+ <syscall name="fadvise64" number="250"/>
+ <syscall name="exit_group" number="252"/>
+ <syscall name="lookup_dcookie" number="253"/>
+ <syscall name="epoll_create" number="254"/>
+ <syscall name="epoll_ctl" number="255"/>
+ <syscall name="epoll_wait" number="256"/>
+ <syscall name="remap_file_pages" number="257"/>
+ <syscall name="set_tid_address" number="258"/>
+ <syscall name="timer_create" number="259"/>
+ <syscall name="timer_settime" number="260"/>
+ <syscall name="timer_gettime" number="261"/>
+ <syscall name="timer_getoverrun" number="262"/>
+ <syscall name="timer_delete" number="263"/>
+ <syscall name="clock_settime" number="264"/>
+ <syscall name="clock_gettime" number="265"/>
+ <syscall name="clock_getres" number="266"/>
+ <syscall name="clock_nanosleep" number="267"/>
+ <syscall name="statfs64" number="268"/>
+ <syscall name="fstatfs64" number="269"/>
+ <syscall name="tgkill" number="270"/>
+ <syscall name="utimes" number="271"/>
+ <syscall name="fadvise64_64" number="272"/>
+ <syscall name="vserver" number="273"/>
+ <syscall name="mbind" number="274"/>
+ <syscall name="get_mempolicy" number="275"/>
+ <syscall name="set_mempolicy" number="276"/>
+ <syscall name="mq_open" number="277"/>
+ <syscall name="mq_unlink" number="278"/>
+ <syscall name="mq_timedsend" number="279"/>
+ <syscall name="mq_timedreceive" number="280"/>
+ <syscall name="mq_notify" number="281"/>
+ <syscall name="mq_getsetattr" number="282"/>
+ <syscall name="kexec_load" number="283"/>
+ <syscall name="waitid" number="284"/>
+ <syscall name="add_key" number="286"/>
+ <syscall name="request_key" number="287"/>
+ <syscall name="keyctl" number="288"/>
+ <syscall name="ioprio_set" number="289"/>
+ <syscall name="ioprio_get" number="290"/>
+ <syscall name="inotify_init" number="291"/>
+ <syscall name="inotify_add_watch" number="292"/>
+ <syscall name="inotify_rm_watch" number="293"/>
+ <syscall name="migrate_pages" number="294"/>
+ <syscall name="openat" number="295"/>
+ <syscall name="mkdirat" number="296"/>
+ <syscall name="mknodat" number="297"/>
+ <syscall name="fchownat" number="298"/>
+ <syscall name="futimesat" number="299"/>
+ <syscall name="fstatat64" number="300"/>
+ <syscall name="unlinkat" number="301"/>
+ <syscall name="renameat" number="302"/>
+ <syscall name="linkat" number="303"/>
+ <syscall name="symlinkat" number="304"/>
+ <syscall name="readlinkat" number="305"/>
+ <syscall name="fchmodat" number="306"/>
+ <syscall name="faccessat" number="307"/>
+ <syscall name="pselect6" number="308"/>
+ <syscall name="ppoll" number="309"/>
+ <syscall name="unshare" number="310"/>
+ <syscall name="set_robust_list" number="311"/>
+ <syscall name="get_robust_list" number="312"/>
+ <syscall name="splice" number="313"/>
+ <syscall name="sync_file_range" number="314"/>
+ <syscall name="tee" number="315"/>
+ <syscall name="vmsplice" number="316"/>
+ <syscall name="move_pages" number="317"/>
+ <syscall name="getcpu" number="318"/>
+ <syscall name="epoll_pwait" number="319"/>
+ <syscall name="utimensat" number="320"/>
+ <syscall name="signalfd" number="321"/>
+ <syscall name="timerfd_create" number="322"/>
+ <syscall name="eventfd" number="323"/>
+ <syscall name="fallocate" number="324"/>
+ <syscall name="timerfd_settime" number="325"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/mips-n32-linux.xml b/share/gdb/syscalls/mips-n32-linux.xml
new file mode 100644
index 0000000..b4e2181
--- /dev/null
+++ b/share/gdb/syscalls/mips-n32-linux.xml
@@ -0,0 +1,319 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/mips/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="read" number="6000"/>
+ <syscall name="write" number="6001"/>
+ <syscall name="open" number="6002"/>
+ <syscall name="close" number="6003"/>
+ <syscall name="stat" number="6004"/>
+ <syscall name="fstat" number="6005"/>
+ <syscall name="lstat" number="6006"/>
+ <syscall name="poll" number="6007"/>
+ <syscall name="lseek" number="6008"/>
+ <syscall name="mmap" number="6009"/>
+ <syscall name="mprotect" number="6010"/>
+ <syscall name="munmap" number="6011"/>
+ <syscall name="brk" number="6012"/>
+ <syscall name="rt_sigaction" number="6013"/>
+ <syscall name="rt_sigprocmask" number="6014"/>
+ <syscall name="ioctl" number="6015"/>
+ <syscall name="pread64" number="6016"/>
+ <syscall name="pwrite64" number="6017"/>
+ <syscall name="readv" number="6018"/>
+ <syscall name="writev" number="6019"/>
+ <syscall name="access" number="6020"/>
+ <syscall name="pipe" number="6021"/>
+ <syscall name="_newselect" number="6022"/>
+ <syscall name="sched_yield" number="6023"/>
+ <syscall name="mremap" number="6024"/>
+ <syscall name="msync" number="6025"/>
+ <syscall name="mincore" number="6026"/>
+ <syscall name="madvise" number="6027"/>
+ <syscall name="shmget" number="6028"/>
+ <syscall name="shmat" number="6029"/>
+ <syscall name="shmctl" number="6030"/>
+ <syscall name="dup" number="6031"/>
+ <syscall name="dup2" number="6032"/>
+ <syscall name="pause" number="6033"/>
+ <syscall name="nanosleep" number="6034"/>
+ <syscall name="getitimer" number="6035"/>
+ <syscall name="setitimer" number="6036"/>
+ <syscall name="alarm" number="6037"/>
+ <syscall name="getpid" number="6038"/>
+ <syscall name="sendfile" number="6039"/>
+ <syscall name="socket" number="6040"/>
+ <syscall name="connect" number="6041"/>
+ <syscall name="accept" number="6042"/>
+ <syscall name="sendto" number="6043"/>
+ <syscall name="recvfrom" number="6044"/>
+ <syscall name="sendmsg" number="6045"/>
+ <syscall name="recvmsg" number="6046"/>
+ <syscall name="shutdown" number="6047"/>
+ <syscall name="bind" number="6048"/>
+ <syscall name="listen" number="6049"/>
+ <syscall name="getsockname" number="6050"/>
+ <syscall name="getpeername" number="6051"/>
+ <syscall name="socketpair" number="6052"/>
+ <syscall name="setsockopt" number="6053"/>
+ <syscall name="getsockopt" number="6054"/>
+ <syscall name="clone" number="6055"/>
+ <syscall name="fork" number="6056"/>
+ <syscall name="execve" number="6057"/>
+ <syscall name="exit" number="6058"/>
+ <syscall name="wait4" number="6059"/>
+ <syscall name="kill" number="6060"/>
+ <syscall name="uname" number="6061"/>
+ <syscall name="semget" number="6062"/>
+ <syscall name="semop" number="6063"/>
+ <syscall name="semctl" number="6064"/>
+ <syscall name="shmdt" number="6065"/>
+ <syscall name="msgget" number="6066"/>
+ <syscall name="msgsnd" number="6067"/>
+ <syscall name="msgrcv" number="6068"/>
+ <syscall name="msgctl" number="6069"/>
+ <syscall name="fcntl" number="6070"/>
+ <syscall name="flock" number="6071"/>
+ <syscall name="fsync" number="6072"/>
+ <syscall name="fdatasync" number="6073"/>
+ <syscall name="truncate" number="6074"/>
+ <syscall name="ftruncate" number="6075"/>
+ <syscall name="getdents" number="6076"/>
+ <syscall name="getcwd" number="6077"/>
+ <syscall name="chdir" number="6078"/>
+ <syscall name="fchdir" number="6079"/>
+ <syscall name="rename" number="6080"/>
+ <syscall name="mkdir" number="6081"/>
+ <syscall name="rmdir" number="6082"/>
+ <syscall name="creat" number="6083"/>
+ <syscall name="link" number="6084"/>
+ <syscall name="unlink" number="6085"/>
+ <syscall name="symlink" number="6086"/>
+ <syscall name="readlink" number="6087"/>
+ <syscall name="chmod" number="6088"/>
+ <syscall name="fchmod" number="6089"/>
+ <syscall name="chown" number="6090"/>
+ <syscall name="fchown" number="6091"/>
+ <syscall name="lchown" number="6092"/>
+ <syscall name="umask" number="6093"/>
+ <syscall name="gettimeofday" number="6094"/>
+ <syscall name="getrlimit" number="6095"/>
+ <syscall name="getrusage" number="6096"/>
+ <syscall name="sysinfo" number="6097"/>
+ <syscall name="times" number="6098"/>
+ <syscall name="ptrace" number="6099"/>
+ <syscall name="getuid" number="6100"/>
+ <syscall name="syslog" number="6101"/>
+ <syscall name="getgid" number="6102"/>
+ <syscall name="setuid" number="6103"/>
+ <syscall name="setgid" number="6104"/>
+ <syscall name="geteuid" number="6105"/>
+ <syscall name="getegid" number="6106"/>
+ <syscall name="setpgid" number="6107"/>
+ <syscall name="getppid" number="6108"/>
+ <syscall name="getpgrp" number="6109"/>
+ <syscall name="setsid" number="6110"/>
+ <syscall name="setreuid" number="6111"/>
+ <syscall name="setregid" number="6112"/>
+ <syscall name="getgroups" number="6113"/>
+ <syscall name="setgroups" number="6114"/>
+ <syscall name="setresuid" number="6115"/>
+ <syscall name="getresuid" number="6116"/>
+ <syscall name="setresgid" number="6117"/>
+ <syscall name="getresgid" number="6118"/>
+ <syscall name="getpgid" number="6119"/>
+ <syscall name="setfsuid" number="6120"/>
+ <syscall name="setfsgid" number="6121"/>
+ <syscall name="getsid" number="6122"/>
+ <syscall name="capget" number="6123"/>
+ <syscall name="capset" number="6124"/>
+ <syscall name="rt_sigpending" number="6125"/>
+ <syscall name="rt_sigtimedwait" number="6126"/>
+ <syscall name="rt_sigqueueinfo" number="6127"/>
+ <syscall name="rt_sigsuspend" number="6128"/>
+ <syscall name="sigaltstack" number="6129"/>
+ <syscall name="utime" number="6130"/>
+ <syscall name="mknod" number="6131"/>
+ <syscall name="personality" number="6132"/>
+ <syscall name="ustat" number="6133"/>
+ <syscall name="statfs" number="6134"/>
+ <syscall name="fstatfs" number="6135"/>
+ <syscall name="sysfs" number="6136"/>
+ <syscall name="getpriority" number="6137"/>
+ <syscall name="setpriority" number="6138"/>
+ <syscall name="sched_setparam" number="6139"/>
+ <syscall name="sched_getparam" number="6140"/>
+ <syscall name="sched_setscheduler" number="6141"/>
+ <syscall name="sched_getscheduler" number="6142"/>
+ <syscall name="sched_get_priority_max" number="6143"/>
+ <syscall name="sched_get_priority_min" number="6144"/>
+ <syscall name="sched_rr_get_interval" number="6145"/>
+ <syscall name="mlock" number="6146"/>
+ <syscall name="munlock" number="6147"/>
+ <syscall name="mlockall" number="6148"/>
+ <syscall name="munlockall" number="6149"/>
+ <syscall name="vhangup" number="6150"/>
+ <syscall name="pivot_root" number="6151"/>
+ <syscall name="_sysctl" number="6152"/>
+ <syscall name="prctl" number="6153"/>
+ <syscall name="adjtimex" number="6154"/>
+ <syscall name="setrlimit" number="6155"/>
+ <syscall name="chroot" number="6156"/>
+ <syscall name="sync" number="6157"/>
+ <syscall name="acct" number="6158"/>
+ <syscall name="settimeofday" number="6159"/>
+ <syscall name="mount" number="6160"/>
+ <syscall name="umount2" number="6161"/>
+ <syscall name="swapon" number="6162"/>
+ <syscall name="swapoff" number="6163"/>
+ <syscall name="reboot" number="6164"/>
+ <syscall name="sethostname" number="6165"/>
+ <syscall name="setdomainname" number="6166"/>
+ <syscall name="create_module" number="6167"/>
+ <syscall name="init_module" number="6168"/>
+ <syscall name="delete_module" number="6169"/>
+ <syscall name="get_kernel_syms" number="6170"/>
+ <syscall name="query_module" number="6171"/>
+ <syscall name="quotactl" number="6172"/>
+ <syscall name="nfsservctl" number="6173"/>
+ <syscall name="getpmsg" number="6174"/>
+ <syscall name="putpmsg" number="6175"/>
+ <syscall name="afs_syscall" number="6176"/>
+ <syscall name="reserved177" number="6177"/>
+ <syscall name="gettid" number="6178"/>
+ <syscall name="readahead" number="6179"/>
+ <syscall name="setxattr" number="6180"/>
+ <syscall name="lsetxattr" number="6181"/>
+ <syscall name="fsetxattr" number="6182"/>
+ <syscall name="getxattr" number="6183"/>
+ <syscall name="lgetxattr" number="6184"/>
+ <syscall name="fgetxattr" number="6185"/>
+ <syscall name="listxattr" number="6186"/>
+ <syscall name="llistxattr" number="6187"/>
+ <syscall name="flistxattr" number="6188"/>
+ <syscall name="removexattr" number="6189"/>
+ <syscall name="lremovexattr" number="6190"/>
+ <syscall name="fremovexattr" number="6191"/>
+ <syscall name="tkill" number="6192"/>
+ <syscall name="reserved193" number="6193"/>
+ <syscall name="futex" number="6194"/>
+ <syscall name="sched_setaffinity" number="6195"/>
+ <syscall name="sched_getaffinity" number="6196"/>
+ <syscall name="cacheflush" number="6197"/>
+ <syscall name="cachectl" number="6198"/>
+ <syscall name="sysmips" number="6199"/>
+ <syscall name="io_setup" number="6200"/>
+ <syscall name="io_destroy" number="6201"/>
+ <syscall name="io_getevents" number="6202"/>
+ <syscall name="io_submit" number="6203"/>
+ <syscall name="io_cancel" number="6204"/>
+ <syscall name="exit_group" number="6205"/>
+ <syscall name="lookup_dcookie" number="6206"/>
+ <syscall name="epoll_create" number="6207"/>
+ <syscall name="epoll_ctl" number="6208"/>
+ <syscall name="epoll_wait" number="6209"/>
+ <syscall name="remap_file_pages" number="6210"/>
+ <syscall name="rt_sigreturn" number="6211"/>
+ <syscall name="fcntl64" number="6212"/>
+ <syscall name="set_tid_address" number="6213"/>
+ <syscall name="restart_syscall" number="6214"/>
+ <syscall name="semtimedop" number="6215"/>
+ <syscall name="fadvise64" number="6216"/>
+ <syscall name="statfs64" number="6217"/>
+ <syscall name="fstatfs64" number="6218"/>
+ <syscall name="sendfile64" number="6219"/>
+ <syscall name="timer_create" number="6220"/>
+ <syscall name="timer_settime" number="6221"/>
+ <syscall name="timer_gettime" number="6222"/>
+ <syscall name="timer_getoverrun" number="6223"/>
+ <syscall name="timer_delete" number="6224"/>
+ <syscall name="clock_settime" number="6225"/>
+ <syscall name="clock_gettime" number="6226"/>
+ <syscall name="clock_getres" number="6227"/>
+ <syscall name="clock_nanosleep" number="6228"/>
+ <syscall name="tgkill" number="6229"/>
+ <syscall name="utimes" number="6230"/>
+ <syscall name="mbind" number="6231"/>
+ <syscall name="get_mempolicy" number="6232"/>
+ <syscall name="set_mempolicy" number="6233"/>
+ <syscall name="mq_open" number="6234"/>
+ <syscall name="mq_unlink" number="6235"/>
+ <syscall name="mq_timedsend" number="6236"/>
+ <syscall name="mq_timedreceive" number="6237"/>
+ <syscall name="mq_notify" number="6238"/>
+ <syscall name="mq_getsetattr" number="6239"/>
+ <syscall name="vserver" number="6240"/>
+ <syscall name="waitid" number="6241"/>
+ <syscall name="add_key" number="6243"/>
+ <syscall name="request_key" number="6244"/>
+ <syscall name="keyctl" number="6245"/>
+ <syscall name="set_thread_area" number="6246"/>
+ <syscall name="inotify_init" number="6247"/>
+ <syscall name="inotify_add_watch" number="6248"/>
+ <syscall name="inotify_rm_watch" number="6249"/>
+ <syscall name="migrate_pages" number="6250"/>
+ <syscall name="openat" number="6251"/>
+ <syscall name="mkdirat" number="6252"/>
+ <syscall name="mknodat" number="6253"/>
+ <syscall name="fchownat" number="6254"/>
+ <syscall name="futimesat" number="6255"/>
+ <syscall name="newfstatat" number="6256"/>
+ <syscall name="unlinkat" number="6257"/>
+ <syscall name="renameat" number="6258"/>
+ <syscall name="linkat" number="6259"/>
+ <syscall name="symlinkat" number="6260"/>
+ <syscall name="readlinkat" number="6261"/>
+ <syscall name="fchmodat" number="6262"/>
+ <syscall name="faccessat" number="6263"/>
+ <syscall name="pselect6" number="6264"/>
+ <syscall name="ppoll" number="6265"/>
+ <syscall name="unshare" number="6266"/>
+ <syscall name="splice" number="6267"/>
+ <syscall name="sync_file_range" number="6268"/>
+ <syscall name="tee" number="6269"/>
+ <syscall name="vmsplice" number="6270"/>
+ <syscall name="move_pages" number="6271"/>
+ <syscall name="set_robust_list" number="6272"/>
+ <syscall name="get_robust_list" number="6273"/>
+ <syscall name="kexec_load" number="6274"/>
+ <syscall name="getcpu" number="6275"/>
+ <syscall name="epoll_pwait" number="6276"/>
+ <syscall name="ioprio_set" number="6277"/>
+ <syscall name="ioprio_get" number="6278"/>
+ <syscall name="utimensat" number="6279"/>
+ <syscall name="signalfd" number="6280"/>
+ <syscall name="timerfd" number="6281"/>
+ <syscall name="eventfd" number="6282"/>
+ <syscall name="fallocate" number="6283"/>
+ <syscall name="timerfd_create" number="6284"/>
+ <syscall name="timerfd_gettime" number="6285"/>
+ <syscall name="timerfd_settime" number="6286"/>
+ <syscall name="signalfd4" number="6287"/>
+ <syscall name="eventfd2" number="6288"/>
+ <syscall name="epoll_create1" number="6289"/>
+ <syscall name="dup3" number="6290"/>
+ <syscall name="pipe2" number="6291"/>
+ <syscall name="inotify_init1" number="6292"/>
+ <syscall name="preadv" number="6293"/>
+ <syscall name="pwritev" number="6294"/>
+ <syscall name="rt_tgsigqueueinfo" number="6295"/>
+ <syscall name="perf_event_open" number="6296"/>
+ <syscall name="accept4" number="6297"/>
+ <syscall name="recvmmsg" number="6298"/>
+ <syscall name="getdents64" number="6299"/>
+ <syscall name="fanotify_init" number="6300"/>
+ <syscall name="fanotify_mark" number="6301"/>
+ <syscall name="prlimit64" number="6302"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/mips-n64-linux.xml b/share/gdb/syscalls/mips-n64-linux.xml
new file mode 100644
index 0000000..896e0c0
--- /dev/null
+++ b/share/gdb/syscalls/mips-n64-linux.xml
@@ -0,0 +1,312 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/mips/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="read" number="5000"/>
+ <syscall name="write" number="5001"/>
+ <syscall name="open" number="5002"/>
+ <syscall name="close" number="5003"/>
+ <syscall name="stat" number="5004"/>
+ <syscall name="fstat" number="5005"/>
+ <syscall name="lstat" number="5006"/>
+ <syscall name="poll" number="5007"/>
+ <syscall name="lseek" number="5008"/>
+ <syscall name="mmap" number="5009"/>
+ <syscall name="mprotect" number="5010"/>
+ <syscall name="munmap" number="5011"/>
+ <syscall name="brk" number="5012"/>
+ <syscall name="rt_sigaction" number="5013"/>
+ <syscall name="rt_sigprocmask" number="5014"/>
+ <syscall name="ioctl" number="5015"/>
+ <syscall name="pread64" number="5016"/>
+ <syscall name="pwrite64" number="5017"/>
+ <syscall name="readv" number="5018"/>
+ <syscall name="writev" number="5019"/>
+ <syscall name="access" number="5020"/>
+ <syscall name="pipe" number="5021"/>
+ <syscall name="_newselect" number="5022"/>
+ <syscall name="sched_yield" number="5023"/>
+ <syscall name="mremap" number="5024"/>
+ <syscall name="msync" number="5025"/>
+ <syscall name="mincore" number="5026"/>
+ <syscall name="madvise" number="5027"/>
+ <syscall name="shmget" number="5028"/>
+ <syscall name="shmat" number="5029"/>
+ <syscall name="shmctl" number="5030"/>
+ <syscall name="dup" number="5031"/>
+ <syscall name="dup2" number="5032"/>
+ <syscall name="pause" number="5033"/>
+ <syscall name="nanosleep" number="5034"/>
+ <syscall name="getitimer" number="5035"/>
+ <syscall name="setitimer" number="5036"/>
+ <syscall name="alarm" number="5037"/>
+ <syscall name="getpid" number="5038"/>
+ <syscall name="sendfile" number="5039"/>
+ <syscall name="socket" number="5040"/>
+ <syscall name="connect" number="5041"/>
+ <syscall name="accept" number="5042"/>
+ <syscall name="sendto" number="5043"/>
+ <syscall name="recvfrom" number="5044"/>
+ <syscall name="sendmsg" number="5045"/>
+ <syscall name="recvmsg" number="5046"/>
+ <syscall name="shutdown" number="5047"/>
+ <syscall name="bind" number="5048"/>
+ <syscall name="listen" number="5049"/>
+ <syscall name="getsockname" number="5050"/>
+ <syscall name="getpeername" number="5051"/>
+ <syscall name="socketpair" number="5052"/>
+ <syscall name="setsockopt" number="5053"/>
+ <syscall name="getsockopt" number="5054"/>
+ <syscall name="clone" number="5055"/>
+ <syscall name="fork" number="5056"/>
+ <syscall name="execve" number="5057"/>
+ <syscall name="exit" number="5058"/>
+ <syscall name="wait4" number="5059"/>
+ <syscall name="kill" number="5060"/>
+ <syscall name="uname" number="5061"/>
+ <syscall name="semget" number="5062"/>
+ <syscall name="semop" number="5063"/>
+ <syscall name="semctl" number="5064"/>
+ <syscall name="shmdt" number="5065"/>
+ <syscall name="msgget" number="5066"/>
+ <syscall name="msgsnd" number="5067"/>
+ <syscall name="msgrcv" number="5068"/>
+ <syscall name="msgctl" number="5069"/>
+ <syscall name="fcntl" number="5070"/>
+ <syscall name="flock" number="5071"/>
+ <syscall name="fsync" number="5072"/>
+ <syscall name="fdatasync" number="5073"/>
+ <syscall name="truncate" number="5074"/>
+ <syscall name="ftruncate" number="5075"/>
+ <syscall name="getdents" number="5076"/>
+ <syscall name="getcwd" number="5077"/>
+ <syscall name="chdir" number="5078"/>
+ <syscall name="fchdir" number="5079"/>
+ <syscall name="rename" number="5080"/>
+ <syscall name="mkdir" number="5081"/>
+ <syscall name="rmdir" number="5082"/>
+ <syscall name="creat" number="5083"/>
+ <syscall name="link" number="5084"/>
+ <syscall name="unlink" number="5085"/>
+ <syscall name="symlink" number="5086"/>
+ <syscall name="readlink" number="5087"/>
+ <syscall name="chmod" number="5088"/>
+ <syscall name="fchmod" number="5089"/>
+ <syscall name="chown" number="5090"/>
+ <syscall name="fchown" number="5091"/>
+ <syscall name="lchown" number="5092"/>
+ <syscall name="umask" number="5093"/>
+ <syscall name="gettimeofday" number="5094"/>
+ <syscall name="getrlimit" number="5095"/>
+ <syscall name="getrusage" number="5096"/>
+ <syscall name="sysinfo" number="5097"/>
+ <syscall name="times" number="5098"/>
+ <syscall name="ptrace" number="5099"/>
+ <syscall name="getuid" number="5100"/>
+ <syscall name="syslog" number="5101"/>
+ <syscall name="getgid" number="5102"/>
+ <syscall name="setuid" number="5103"/>
+ <syscall name="setgid" number="5104"/>
+ <syscall name="geteuid" number="5105"/>
+ <syscall name="getegid" number="5106"/>
+ <syscall name="setpgid" number="5107"/>
+ <syscall name="getppid" number="5108"/>
+ <syscall name="getpgrp" number="5109"/>
+ <syscall name="setsid" number="5110"/>
+ <syscall name="setreuid" number="5111"/>
+ <syscall name="setregid" number="5112"/>
+ <syscall name="getgroups" number="5113"/>
+ <syscall name="setgroups" number="5114"/>
+ <syscall name="setresuid" number="5115"/>
+ <syscall name="getresuid" number="5116"/>
+ <syscall name="setresgid" number="5117"/>
+ <syscall name="getresgid" number="5118"/>
+ <syscall name="getpgid" number="5119"/>
+ <syscall name="setfsuid" number="5120"/>
+ <syscall name="setfsgid" number="5121"/>
+ <syscall name="getsid" number="5122"/>
+ <syscall name="capget" number="5123"/>
+ <syscall name="capset" number="5124"/>
+ <syscall name="rt_sigpending" number="5125"/>
+ <syscall name="rt_sigtimedwait" number="5126"/>
+ <syscall name="rt_sigqueueinfo" number="5127"/>
+ <syscall name="rt_sigsuspend" number="5128"/>
+ <syscall name="sigaltstack" number="5129"/>
+ <syscall name="utime" number="5130"/>
+ <syscall name="mknod" number="5131"/>
+ <syscall name="personality" number="5132"/>
+ <syscall name="ustat" number="5133"/>
+ <syscall name="statfs" number="5134"/>
+ <syscall name="fstatfs" number="5135"/>
+ <syscall name="sysfs" number="5136"/>
+ <syscall name="getpriority" number="5137"/>
+ <syscall name="setpriority" number="5138"/>
+ <syscall name="sched_setparam" number="5139"/>
+ <syscall name="sched_getparam" number="5140"/>
+ <syscall name="sched_setscheduler" number="5141"/>
+ <syscall name="sched_getscheduler" number="5142"/>
+ <syscall name="sched_get_priority_max" number="5143"/>
+ <syscall name="sched_get_priority_min" number="5144"/>
+ <syscall name="sched_rr_get_interval" number="5145"/>
+ <syscall name="mlock" number="5146"/>
+ <syscall name="munlock" number="5147"/>
+ <syscall name="mlockall" number="5148"/>
+ <syscall name="munlockall" number="5149"/>
+ <syscall name="vhangup" number="5150"/>
+ <syscall name="pivot_root" number="5151"/>
+ <syscall name="_sysctl" number="5152"/>
+ <syscall name="prctl" number="5153"/>
+ <syscall name="adjtimex" number="5154"/>
+ <syscall name="setrlimit" number="5155"/>
+ <syscall name="chroot" number="5156"/>
+ <syscall name="sync" number="5157"/>
+ <syscall name="acct" number="5158"/>
+ <syscall name="settimeofday" number="5159"/>
+ <syscall name="mount" number="5160"/>
+ <syscall name="umount2" number="5161"/>
+ <syscall name="swapon" number="5162"/>
+ <syscall name="swapoff" number="5163"/>
+ <syscall name="reboot" number="5164"/>
+ <syscall name="sethostname" number="5165"/>
+ <syscall name="setdomainname" number="5166"/>
+ <syscall name="create_module" number="5167"/>
+ <syscall name="init_module" number="5168"/>
+ <syscall name="delete_module" number="5169"/>
+ <syscall name="get_kernel_syms" number="5170"/>
+ <syscall name="query_module" number="5171"/>
+ <syscall name="quotactl" number="5172"/>
+ <syscall name="nfsservctl" number="5173"/>
+ <syscall name="getpmsg" number="5174"/>
+ <syscall name="putpmsg" number="5175"/>
+ <syscall name="afs_syscall" number="5176"/>
+ <syscall name="gettid" number="5178"/>
+ <syscall name="readahead" number="5179"/>
+ <syscall name="setxattr" number="5180"/>
+ <syscall name="lsetxattr" number="5181"/>
+ <syscall name="fsetxattr" number="5182"/>
+ <syscall name="getxattr" number="5183"/>
+ <syscall name="lgetxattr" number="5184"/>
+ <syscall name="fgetxattr" number="5185"/>
+ <syscall name="listxattr" number="5186"/>
+ <syscall name="llistxattr" number="5187"/>
+ <syscall name="flistxattr" number="5188"/>
+ <syscall name="removexattr" number="5189"/>
+ <syscall name="lremovexattr" number="5190"/>
+ <syscall name="fremovexattr" number="5191"/>
+ <syscall name="tkill" number="5192"/>
+ <syscall name="futex" number="5194"/>
+ <syscall name="sched_setaffinity" number="5195"/>
+ <syscall name="sched_getaffinity" number="5196"/>
+ <syscall name="cacheflush" number="5197"/>
+ <syscall name="cachectl" number="5198"/>
+ <syscall name="sysmips" number="5199"/>
+ <syscall name="io_setup" number="5200"/>
+ <syscall name="io_destroy" number="5201"/>
+ <syscall name="io_getevents" number="5202"/>
+ <syscall name="io_submit" number="5203"/>
+ <syscall name="io_cancel" number="5204"/>
+ <syscall name="exit_group" number="5205"/>
+ <syscall name="lookup_dcookie" number="5206"/>
+ <syscall name="epoll_create" number="5207"/>
+ <syscall name="epoll_ctl" number="5208"/>
+ <syscall name="epoll_wait" number="5209"/>
+ <syscall name="remap_file_pages" number="5210"/>
+ <syscall name="rt_sigreturn" number="5211"/>
+ <syscall name="set_tid_address" number="5212"/>
+ <syscall name="restart_syscall" number="5213"/>
+ <syscall name="semtimedop" number="5214"/>
+ <syscall name="fadvise64" number="5215"/>
+ <syscall name="timer_create" number="5216"/>
+ <syscall name="timer_settime" number="5217"/>
+ <syscall name="timer_gettime" number="5218"/>
+ <syscall name="timer_getoverrun" number="5219"/>
+ <syscall name="timer_delete" number="5220"/>
+ <syscall name="clock_settime" number="5221"/>
+ <syscall name="clock_gettime" number="5222"/>
+ <syscall name="clock_getres" number="5223"/>
+ <syscall name="clock_nanosleep" number="5224"/>
+ <syscall name="tgkill" number="5225"/>
+ <syscall name="utimes" number="5226"/>
+ <syscall name="mbind" number="5227"/>
+ <syscall name="get_mempolicy" number="5228"/>
+ <syscall name="set_mempolicy" number="5229"/>
+ <syscall name="mq_open" number="5230"/>
+ <syscall name="mq_unlink" number="5231"/>
+ <syscall name="mq_timedsend" number="5232"/>
+ <syscall name="mq_timedreceive" number="5233"/>
+ <syscall name="mq_notify" number="5234"/>
+ <syscall name="mq_getsetattr" number="5235"/>
+ <syscall name="vserver" number="5236"/>
+ <syscall name="waitid" number="5237"/>
+ <syscall name="add_key" number="5239"/>
+ <syscall name="request_key" number="5240"/>
+ <syscall name="keyctl" number="5241"/>
+ <syscall name="set_thread_area" number="5242"/>
+ <syscall name="inotify_init" number="5243"/>
+ <syscall name="inotify_add_watch" number="5244"/>
+ <syscall name="inotify_rm_watch" number="5245"/>
+ <syscall name="migrate_pages" number="5246"/>
+ <syscall name="openat" number="5247"/>
+ <syscall name="mkdirat" number="5248"/>
+ <syscall name="mknodat" number="5249"/>
+ <syscall name="fchownat" number="5250"/>
+ <syscall name="futimesat" number="5251"/>
+ <syscall name="newfstatat" number="5252"/>
+ <syscall name="unlinkat" number="5253"/>
+ <syscall name="renameat" number="5254"/>
+ <syscall name="linkat" number="5255"/>
+ <syscall name="symlinkat" number="5256"/>
+ <syscall name="readlinkat" number="5257"/>
+ <syscall name="fchmodat" number="5258"/>
+ <syscall name="faccessat" number="5259"/>
+ <syscall name="pselect6" number="5260"/>
+ <syscall name="ppoll" number="5261"/>
+ <syscall name="unshare" number="5262"/>
+ <syscall name="splice" number="5263"/>
+ <syscall name="sync_file_range" number="5264"/>
+ <syscall name="tee" number="5265"/>
+ <syscall name="vmsplice" number="5266"/>
+ <syscall name="move_pages" number="5267"/>
+ <syscall name="set_robust_list" number="5268"/>
+ <syscall name="get_robust_list" number="5269"/>
+ <syscall name="kexec_load" number="5270"/>
+ <syscall name="getcpu" number="5271"/>
+ <syscall name="epoll_pwait" number="5272"/>
+ <syscall name="ioprio_set" number="5273"/>
+ <syscall name="ioprio_get" number="5274"/>
+ <syscall name="utimensat" number="5275"/>
+ <syscall name="signalfd" number="5276"/>
+ <syscall name="timerfd" number="5277"/>
+ <syscall name="eventfd" number="5278"/>
+ <syscall name="fallocate" number="5279"/>
+ <syscall name="timerfd_create" number="5280"/>
+ <syscall name="timerfd_gettime" number="5281"/>
+ <syscall name="timerfd_settime" number="5282"/>
+ <syscall name="signalfd4" number="5283"/>
+ <syscall name="eventfd2" number="5284"/>
+ <syscall name="epoll_create1" number="5285"/>
+ <syscall name="dup3" number="5286"/>
+ <syscall name="pipe2" number="5287"/>
+ <syscall name="inotify_init1" number="5288"/>
+ <syscall name="preadv" number="5289"/>
+ <syscall name="pwritev" number="5290"/>
+ <syscall name="rt_tgsigqueueinfo" number="5291"/>
+ <syscall name="perf_event_open" number="5292"/>
+ <syscall name="accept4" number="5293"/>
+ <syscall name="recvmmsg" number="5294"/>
+ <syscall name="fanotify_init" number="5295"/>
+ <syscall name="fanotify_mark" number="5296"/>
+ <syscall name="prlimit64" number="5297"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/mips-o32-linux.xml b/share/gdb/syscalls/mips-o32-linux.xml
new file mode 100644
index 0000000..2b11247
--- /dev/null
+++ b/share/gdb/syscalls/mips-o32-linux.xml
@@ -0,0 +1,347 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2011-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/mips/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="syscall" number="4000"/>
+ <syscall name="exit" number="4001"/>
+ <syscall name="fork" number="4002"/>
+ <syscall name="read" number="4003"/>
+ <syscall name="write" number="4004"/>
+ <syscall name="open" number="4005"/>
+ <syscall name="close" number="4006"/>
+ <syscall name="waitpid" number="4007"/>
+ <syscall name="creat" number="4008"/>
+ <syscall name="link" number="4009"/>
+ <syscall name="unlink" number="4010"/>
+ <syscall name="execve" number="4011"/>
+ <syscall name="chdir" number="4012"/>
+ <syscall name="time" number="4013"/>
+ <syscall name="mknod" number="4014"/>
+ <syscall name="chmod" number="4015"/>
+ <syscall name="lchown" number="4016"/>
+ <syscall name="break" number="4017"/>
+ <syscall name="lseek" number="4019"/>
+ <syscall name="getpid" number="4020"/>
+ <syscall name="mount" number="4021"/>
+ <syscall name="umount" number="4022"/>
+ <syscall name="setuid" number="4023"/>
+ <syscall name="getuid" number="4024"/>
+ <syscall name="stime" number="4025"/>
+ <syscall name="ptrace" number="4026"/>
+ <syscall name="alarm" number="4027"/>
+ <syscall name="pause" number="4029"/>
+ <syscall name="utime" number="4030"/>
+ <syscall name="stty" number="4031"/>
+ <syscall name="gtty" number="4032"/>
+ <syscall name="access" number="4033"/>
+ <syscall name="nice" number="4034"/>
+ <syscall name="ftime" number="4035"/>
+ <syscall name="sync" number="4036"/>
+ <syscall name="kill" number="4037"/>
+ <syscall name="rename" number="4038"/>
+ <syscall name="mkdir" number="4039"/>
+ <syscall name="rmdir" number="4040"/>
+ <syscall name="dup" number="4041"/>
+ <syscall name="pipe" number="4042"/>
+ <syscall name="times" number="4043"/>
+ <syscall name="prof" number="4044"/>
+ <syscall name="brk" number="4045"/>
+ <syscall name="setgid" number="4046"/>
+ <syscall name="getgid" number="4047"/>
+ <syscall name="signal" number="4048"/>
+ <syscall name="geteuid" number="4049"/>
+ <syscall name="getegid" number="4050"/>
+ <syscall name="acct" number="4051"/>
+ <syscall name="umount2" number="4052"/>
+ <syscall name="lock" number="4053"/>
+ <syscall name="ioctl" number="4054"/>
+ <syscall name="fcntl" number="4055"/>
+ <syscall name="mpx" number="4056"/>
+ <syscall name="setpgid" number="4057"/>
+ <syscall name="ulimit" number="4058"/>
+ <syscall name="umask" number="4060"/>
+ <syscall name="chroot" number="4061"/>
+ <syscall name="ustat" number="4062"/>
+ <syscall name="dup2" number="4063"/>
+ <syscall name="getppid" number="4064"/>
+ <syscall name="getpgrp" number="4065"/>
+ <syscall name="setsid" number="4066"/>
+ <syscall name="sigaction" number="4067"/>
+ <syscall name="sgetmask" number="4068"/>
+ <syscall name="ssetmask" number="4069"/>
+ <syscall name="setreuid" number="4070"/>
+ <syscall name="setregid" number="4071"/>
+ <syscall name="sigsuspend" number="4072"/>
+ <syscall name="sigpending" number="4073"/>
+ <syscall name="sethostname" number="4074"/>
+ <syscall name="setrlimit" number="4075"/>
+ <syscall name="getrlimit" number="4076"/>
+ <syscall name="getrusage" number="4077"/>
+ <syscall name="gettimeofday" number="4078"/>
+ <syscall name="settimeofday" number="4079"/>
+ <syscall name="getgroups" number="4080"/>
+ <syscall name="setgroups" number="4081"/>
+ <syscall name="symlink" number="4083"/>
+ <syscall name="readlink" number="4085"/>
+ <syscall name="uselib" number="4086"/>
+ <syscall name="swapon" number="4087"/>
+ <syscall name="reboot" number="4088"/>
+ <syscall name="readdir" number="4089"/>
+ <syscall name="mmap" number="4090"/>
+ <syscall name="munmap" number="4091"/>
+ <syscall name="truncate" number="4092"/>
+ <syscall name="ftruncate" number="4093"/>
+ <syscall name="fchmod" number="4094"/>
+ <syscall name="fchown" number="4095"/>
+ <syscall name="getpriority" number="4096"/>
+ <syscall name="setpriority" number="4097"/>
+ <syscall name="profil" number="4098"/>
+ <syscall name="statfs" number="4099"/>
+ <syscall name="fstatfs" number="4100"/>
+ <syscall name="ioperm" number="4101"/>
+ <syscall name="socketcall" number="4102"/>
+ <syscall name="syslog" number="4103"/>
+ <syscall name="setitimer" number="4104"/>
+ <syscall name="getitimer" number="4105"/>
+ <syscall name="stat" number="4106"/>
+ <syscall name="lstat" number="4107"/>
+ <syscall name="fstat" number="4108"/>
+ <syscall name="iopl" number="4110"/>
+ <syscall name="vhangup" number="4111"/>
+ <syscall name="idle" number="4112"/>
+ <syscall name="vm86" number="4113"/>
+ <syscall name="wait4" number="4114"/>
+ <syscall name="swapoff" number="4115"/>
+ <syscall name="sysinfo" number="4116"/>
+ <syscall name="ipc" number="4117"/>
+ <syscall name="fsync" number="4118"/>
+ <syscall name="sigreturn" number="4119"/>
+ <syscall name="clone" number="4120"/>
+ <syscall name="setdomainname" number="4121"/>
+ <syscall name="uname" number="4122"/>
+ <syscall name="modify_ldt" number="4123"/>
+ <syscall name="adjtimex" number="4124"/>
+ <syscall name="mprotect" number="4125"/>
+ <syscall name="sigprocmask" number="4126"/>
+ <syscall name="create_module" number="4127"/>
+ <syscall name="init_module" number="4128"/>
+ <syscall name="delete_module" number="4129"/>
+ <syscall name="get_kernel_syms" number="4130"/>
+ <syscall name="quotactl" number="4131"/>
+ <syscall name="getpgid" number="4132"/>
+ <syscall name="fchdir" number="4133"/>
+ <syscall name="bdflush" number="4134"/>
+ <syscall name="sysfs" number="4135"/>
+ <syscall name="personality" number="4136"/>
+ <syscall name="afs_syscall" number="4137"/>
+ <syscall name="setfsuid" number="4138"/>
+ <syscall name="setfsgid" number="4139"/>
+ <syscall name="_llseek" number="4140"/>
+ <syscall name="getdents" number="4141"/>
+ <syscall name="_newselect" number="4142"/>
+ <syscall name="flock" number="4143"/>
+ <syscall name="msync" number="4144"/>
+ <syscall name="readv" number="4145"/>
+ <syscall name="writev" number="4146"/>
+ <syscall name="cacheflush" number="4147"/>
+ <syscall name="cachectl" number="4148"/>
+ <syscall name="sysmips" number="4149"/>
+ <syscall name="getsid" number="4151"/>
+ <syscall name="fdatasync" number="4152"/>
+ <syscall name="_sysctl" number="4153"/>
+ <syscall name="mlock" number="4154"/>
+ <syscall name="munlock" number="4155"/>
+ <syscall name="mlockall" number="4156"/>
+ <syscall name="munlockall" number="4157"/>
+ <syscall name="sched_setparam" number="4158"/>
+ <syscall name="sched_getparam" number="4159"/>
+ <syscall name="sched_setscheduler" number="4160"/>
+ <syscall name="sched_getscheduler" number="4161"/>
+ <syscall name="sched_yield" number="4162"/>
+ <syscall name="sched_get_priority_max" number="4163"/>
+ <syscall name="sched_get_priority_min" number="4164"/>
+ <syscall name="sched_rr_get_interval" number="4165"/>
+ <syscall name="nanosleep" number="4166"/>
+ <syscall name="mremap" number="4167"/>
+ <syscall name="accept" number="4168"/>
+ <syscall name="bind" number="4169"/>
+ <syscall name="connect" number="4170"/>
+ <syscall name="getpeername" number="4171"/>
+ <syscall name="getsockname" number="4172"/>
+ <syscall name="getsockopt" number="4173"/>
+ <syscall name="listen" number="4174"/>
+ <syscall name="recv" number="4175"/>
+ <syscall name="recvfrom" number="4176"/>
+ <syscall name="recvmsg" number="4177"/>
+ <syscall name="send" number="4178"/>
+ <syscall name="sendmsg" number="4179"/>
+ <syscall name="sendto" number="4180"/>
+ <syscall name="setsockopt" number="4181"/>
+ <syscall name="shutdown" number="4182"/>
+ <syscall name="socket" number="4183"/>
+ <syscall name="socketpair" number="4184"/>
+ <syscall name="setresuid" number="4185"/>
+ <syscall name="getresuid" number="4186"/>
+ <syscall name="query_module" number="4187"/>
+ <syscall name="poll" number="4188"/>
+ <syscall name="nfsservctl" number="4189"/>
+ <syscall name="setresgid" number="4190"/>
+ <syscall name="getresgid" number="4191"/>
+ <syscall name="prctl" number="4192"/>
+ <syscall name="rt_sigreturn" number="4193"/>
+ <syscall name="rt_sigaction" number="4194"/>
+ <syscall name="rt_sigprocmask" number="4195"/>
+ <syscall name="rt_sigpending" number="4196"/>
+ <syscall name="rt_sigtimedwait" number="4197"/>
+ <syscall name="rt_sigqueueinfo" number="4198"/>
+ <syscall name="rt_sigsuspend" number="4199"/>
+ <syscall name="pread64" number="4200"/>
+ <syscall name="pwrite64" number="4201"/>
+ <syscall name="chown" number="4202"/>
+ <syscall name="getcwd" number="4203"/>
+ <syscall name="capget" number="4204"/>
+ <syscall name="capset" number="4205"/>
+ <syscall name="sigaltstack" number="4206"/>
+ <syscall name="sendfile" number="4207"/>
+ <syscall name="getpmsg" number="4208"/>
+ <syscall name="putpmsg" number="4209"/>
+ <syscall name="mmap2" number="4210"/>
+ <syscall name="truncate64" number="4211"/>
+ <syscall name="ftruncate64" number="4212"/>
+ <syscall name="stat64" number="4213"/>
+ <syscall name="lstat64" number="4214"/>
+ <syscall name="fstat64" number="4215"/>
+ <syscall name="pivot_root" number="4216"/>
+ <syscall name="mincore" number="4217"/>
+ <syscall name="madvise" number="4218"/>
+ <syscall name="getdents64" number="4219"/>
+ <syscall name="fcntl64" number="4220"/>
+ <syscall name="gettid" number="4222"/>
+ <syscall name="readahead" number="4223"/>
+ <syscall name="setxattr" number="4224"/>
+ <syscall name="lsetxattr" number="4225"/>
+ <syscall name="fsetxattr" number="4226"/>
+ <syscall name="getxattr" number="4227"/>
+ <syscall name="lgetxattr" number="4228"/>
+ <syscall name="fgetxattr" number="4229"/>
+ <syscall name="listxattr" number="4230"/>
+ <syscall name="llistxattr" number="4231"/>
+ <syscall name="flistxattr" number="4232"/>
+ <syscall name="removexattr" number="4233"/>
+ <syscall name="lremovexattr" number="4234"/>
+ <syscall name="fremovexattr" number="4235"/>
+ <syscall name="tkill" number="4236"/>
+ <syscall name="sendfile64" number="4237"/>
+ <syscall name="futex" number="4238"/>
+ <syscall name="sched_setaffinity" number="4239"/>
+ <syscall name="sched_getaffinity" number="4240"/>
+ <syscall name="io_setup" number="4241"/>
+ <syscall name="io_destroy" number="4242"/>
+ <syscall name="io_getevents" number="4243"/>
+ <syscall name="io_submit" number="4244"/>
+ <syscall name="io_cancel" number="4245"/>
+ <syscall name="exit_group" number="4246"/>
+ <syscall name="lookup_dcookie" number="4247"/>
+ <syscall name="epoll_create" number="4248"/>
+ <syscall name="epoll_ctl" number="4249"/>
+ <syscall name="epoll_wait" number="4250"/>
+ <syscall name="remap_file_pages" number="4251"/>
+ <syscall name="set_tid_address" number="4252"/>
+ <syscall name="restart_syscall" number="4253"/>
+ <syscall name="fadvise64" number="4254"/>
+ <syscall name="statfs64" number="4255"/>
+ <syscall name="fstatfs64" number="4256"/>
+ <syscall name="timer_create" number="4257"/>
+ <syscall name="timer_settime" number="4258"/>
+ <syscall name="timer_gettime" number="4259"/>
+ <syscall name="timer_getoverrun" number="4260"/>
+ <syscall name="timer_delete" number="4261"/>
+ <syscall name="clock_settime" number="4262"/>
+ <syscall name="clock_gettime" number="4263"/>
+ <syscall name="clock_getres" number="4264"/>
+ <syscall name="clock_nanosleep" number="4265"/>
+ <syscall name="tgkill" number="4266"/>
+ <syscall name="utimes" number="4267"/>
+ <syscall name="mbind" number="4268"/>
+ <syscall name="get_mempolicy" number="4269"/>
+ <syscall name="set_mempolicy" number="4270"/>
+ <syscall name="mq_open" number="4271"/>
+ <syscall name="mq_unlink" number="4272"/>
+ <syscall name="mq_timedsend" number="4273"/>
+ <syscall name="mq_timedreceive" number="4274"/>
+ <syscall name="mq_notify" number="4275"/>
+ <syscall name="mq_getsetattr" number="4276"/>
+ <syscall name="vserver" number="4277"/>
+ <syscall name="waitid" number="4278"/>
+ <syscall name="add_key" number="4280"/>
+ <syscall name="request_key" number="4281"/>
+ <syscall name="keyctl" number="4282"/>
+ <syscall name="set_thread_area" number="4283"/>
+ <syscall name="inotify_init" number="4284"/>
+ <syscall name="inotify_add_watch" number="4285"/>
+ <syscall name="inotify_rm_watch" number="4286"/>
+ <syscall name="migrate_pages" number="4287"/>
+ <syscall name="openat" number="4288"/>
+ <syscall name="mkdirat" number="4289"/>
+ <syscall name="mknodat" number="4290"/>
+ <syscall name="fchownat" number="4291"/>
+ <syscall name="futimesat" number="4292"/>
+ <syscall name="fstatat64" number="4293"/>
+ <syscall name="unlinkat" number="4294"/>
+ <syscall name="renameat" number="4295"/>
+ <syscall name="linkat" number="4296"/>
+ <syscall name="symlinkat" number="4297"/>
+ <syscall name="readlinkat" number="4298"/>
+ <syscall name="fchmodat" number="4299"/>
+ <syscall name="faccessat" number="4300"/>
+ <syscall name="pselect6" number="4301"/>
+ <syscall name="ppoll" number="4302"/>
+ <syscall name="unshare" number="4303"/>
+ <syscall name="splice" number="4304"/>
+ <syscall name="sync_file_range" number="4305"/>
+ <syscall name="tee" number="4306"/>
+ <syscall name="vmsplice" number="4307"/>
+ <syscall name="move_pages" number="4308"/>
+ <syscall name="set_robust_list" number="4309"/>
+ <syscall name="get_robust_list" number="4310"/>
+ <syscall name="kexec_load" number="4311"/>
+ <syscall name="getcpu" number="4312"/>
+ <syscall name="epoll_pwait" number="4313"/>
+ <syscall name="ioprio_set" number="4314"/>
+ <syscall name="ioprio_get" number="4315"/>
+ <syscall name="utimensat" number="4316"/>
+ <syscall name="signalfd" number="4317"/>
+ <syscall name="timerfd" number="4318"/>
+ <syscall name="eventfd" number="4319"/>
+ <syscall name="fallocate" number="4320"/>
+ <syscall name="timerfd_create" number="4321"/>
+ <syscall name="timerfd_gettime" number="4322"/>
+ <syscall name="timerfd_settime" number="4323"/>
+ <syscall name="signalfd4" number="4324"/>
+ <syscall name="eventfd2" number="4325"/>
+ <syscall name="epoll_create1" number="4326"/>
+ <syscall name="dup3" number="4327"/>
+ <syscall name="pipe2" number="4328"/>
+ <syscall name="inotify_init1" number="4329"/>
+ <syscall name="preadv" number="4330"/>
+ <syscall name="pwritev" number="4331"/>
+ <syscall name="rt_tgsigqueueinfo" number="4332"/>
+ <syscall name="perf_event_open" number="4333"/>
+ <syscall name="accept4" number="4334"/>
+ <syscall name="recvmmsg" number="4335"/>
+ <syscall name="fanotify_init" number="4336"/>
+ <syscall name="fanotify_mark" number="4337"/>
+ <syscall name="prlimit64" number="4338"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/ppc-linux.xml b/share/gdb/syscalls/ppc-linux.xml
new file mode 100644
index 0000000..dd4eba6
--- /dev/null
+++ b/share/gdb/syscalls/ppc-linux.xml
@@ -0,0 +1,310 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/powerpc/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="restart_syscall" number="0"/>
+ <syscall name="exit" number="1"/>
+ <syscall name="fork" number="2"/>
+ <syscall name="read" number="3"/>
+ <syscall name="write" number="4"/>
+ <syscall name="open" number="5"/>
+ <syscall name="close" number="6"/>
+ <syscall name="waitpid" number="7"/>
+ <syscall name="creat" number="8"/>
+ <syscall name="link" number="9"/>
+ <syscall name="unlink" number="10"/>
+ <syscall name="execve" number="11"/>
+ <syscall name="chdir" number="12"/>
+ <syscall name="time" number="13"/>
+ <syscall name="mknod" number="14"/>
+ <syscall name="chmod" number="15"/>
+ <syscall name="lchown" number="16"/>
+ <syscall name="break" number="17"/>
+ <syscall name="oldstat" number="18"/>
+ <syscall name="lseek" number="19"/>
+ <syscall name="getpid" number="20"/>
+ <syscall name="mount" number="21"/>
+ <syscall name="umount" number="22"/>
+ <syscall name="setuid" number="23"/>
+ <syscall name="getuid" number="24"/>
+ <syscall name="stime" number="25"/>
+ <syscall name="ptrace" number="26"/>
+ <syscall name="alarm" number="27"/>
+ <syscall name="oldfstat" number="28"/>
+ <syscall name="pause" number="29"/>
+ <syscall name="utime" number="30"/>
+ <syscall name="stty" number="31"/>
+ <syscall name="gtty" number="32"/>
+ <syscall name="access" number="33"/>
+ <syscall name="nice" number="34"/>
+ <syscall name="ftime" number="35"/>
+ <syscall name="sync" number="36"/>
+ <syscall name="kill" number="37"/>
+ <syscall name="rename" number="38"/>
+ <syscall name="mkdir" number="39"/>
+ <syscall name="rmdir" number="40"/>
+ <syscall name="dup" number="41"/>
+ <syscall name="pipe" number="42"/>
+ <syscall name="times" number="43"/>
+ <syscall name="prof" number="44"/>
+ <syscall name="brk" number="45"/>
+ <syscall name="setgid" number="46"/>
+ <syscall name="getgid" number="47"/>
+ <syscall name="signal" number="48"/>
+ <syscall name="geteuid" number="49"/>
+ <syscall name="getegid" number="50"/>
+ <syscall name="acct" number="51"/>
+ <syscall name="umount2" number="52"/>
+ <syscall name="lock" number="53"/>
+ <syscall name="ioctl" number="54"/>
+ <syscall name="fcntl" number="55"/>
+ <syscall name="mpx" number="56"/>
+ <syscall name="setpgid" number="57"/>
+ <syscall name="ulimit" number="58"/>
+ <syscall name="oldolduname" number="59"/>
+ <syscall name="umask" number="60"/>
+ <syscall name="chroot" number="61"/>
+ <syscall name="ustat" number="62"/>
+ <syscall name="dup2" number="63"/>
+ <syscall name="getppid" number="64"/>
+ <syscall name="getpgrp" number="65"/>
+ <syscall name="setsid" number="66"/>
+ <syscall name="sigaction" number="67"/>
+ <syscall name="sgetmask" number="68"/>
+ <syscall name="ssetmask" number="69"/>
+ <syscall name="setreuid" number="70"/>
+ <syscall name="setregid" number="71"/>
+ <syscall name="sigsuspend" number="72"/>
+ <syscall name="sigpending" number="73"/>
+ <syscall name="sethostname" number="74"/>
+ <syscall name="setrlimit" number="75"/>
+ <syscall name="getrlimit" number="76"/>
+ <syscall name="getrusage" number="77"/>
+ <syscall name="gettimeofday" number="78"/>
+ <syscall name="settimeofday" number="79"/>
+ <syscall name="getgroups" number="80"/>
+ <syscall name="setgroups" number="81"/>
+ <syscall name="select" number="82"/>
+ <syscall name="symlink" number="83"/>
+ <syscall name="oldlstat" number="84"/>
+ <syscall name="readlink" number="85"/>
+ <syscall name="uselib" number="86"/>
+ <syscall name="swapon" number="87"/>
+ <syscall name="reboot" number="88"/>
+ <syscall name="readdir" number="89"/>
+ <syscall name="mmap" number="90"/>
+ <syscall name="munmap" number="91"/>
+ <syscall name="truncate" number="92"/>
+ <syscall name="ftruncate" number="93"/>
+ <syscall name="fchmod" number="94"/>
+ <syscall name="fchown" number="95"/>
+ <syscall name="getpriority" number="96"/>
+ <syscall name="setpriority" number="97"/>
+ <syscall name="profil" number="98"/>
+ <syscall name="statfs" number="99"/>
+ <syscall name="fstatfs" number="100"/>
+ <syscall name="ioperm" number="101"/>
+ <syscall name="socketcall" number="102"/>
+ <syscall name="syslog" number="103"/>
+ <syscall name="setitimer" number="104"/>
+ <syscall name="getitimer" number="105"/>
+ <syscall name="stat" number="106"/>
+ <syscall name="lstat" number="107"/>
+ <syscall name="fstat" number="108"/>
+ <syscall name="olduname" number="109"/>
+ <syscall name="iopl" number="110"/>
+ <syscall name="vhangup" number="111"/>
+ <syscall name="idle" number="112"/>
+ <syscall name="vm86" number="113"/>
+ <syscall name="wait4" number="114"/>
+ <syscall name="swapoff" number="115"/>
+ <syscall name="sysinfo" number="116"/>
+ <syscall name="ipc" number="117"/>
+ <syscall name="fsync" number="118"/>
+ <syscall name="sigreturn" number="119"/>
+ <syscall name="clone" number="120"/>
+ <syscall name="setdomainname" number="121"/>
+ <syscall name="uname" number="122"/>
+ <syscall name="modify_ldt" number="123"/>
+ <syscall name="adjtimex" number="124"/>
+ <syscall name="mprotect" number="125"/>
+ <syscall name="sigprocmask" number="126"/>
+ <syscall name="create_module" number="127"/>
+ <syscall name="init_module" number="128"/>
+ <syscall name="delete_module" number="129"/>
+ <syscall name="get_kernel_syms" number="130"/>
+ <syscall name="quotactl" number="131"/>
+ <syscall name="getpgid" number="132"/>
+ <syscall name="fchdir" number="133"/>
+ <syscall name="bdflush" number="134"/>
+ <syscall name="sysfs" number="135"/>
+ <syscall name="personality" number="136"/>
+ <syscall name="afs_syscall" number="137"/>
+ <syscall name="setfsuid" number="138"/>
+ <syscall name="setfsgid" number="139"/>
+ <syscall name="_llseek" number="140"/>
+ <syscall name="getdents" number="141"/>
+ <syscall name="_newselect" number="142"/>
+ <syscall name="flock" number="143"/>
+ <syscall name="msync" number="144"/>
+ <syscall name="readv" number="145"/>
+ <syscall name="writev" number="146"/>
+ <syscall name="getsid" number="147"/>
+ <syscall name="fdatasync" number="148"/>
+ <syscall name="_sysctl" number="149"/>
+ <syscall name="mlock" number="150"/>
+ <syscall name="munlock" number="151"/>
+ <syscall name="mlockall" number="152"/>
+ <syscall name="munlockall" number="153"/>
+ <syscall name="sched_setparam" number="154"/>
+ <syscall name="sched_getparam" number="155"/>
+ <syscall name="sched_setscheduler" number="156"/>
+ <syscall name="sched_getscheduler" number="157"/>
+ <syscall name="sched_yield" number="158"/>
+ <syscall name="sched_get_priority_max" number="159"/>
+ <syscall name="sched_get_priority_min" number="160"/>
+ <syscall name="sched_rr_get_interval" number="161"/>
+ <syscall name="nanosleep" number="162"/>
+ <syscall name="mremap" number="163"/>
+ <syscall name="setresuid" number="164"/>
+ <syscall name="getresuid" number="165"/>
+ <syscall name="query_module" number="166"/>
+ <syscall name="poll" number="167"/>
+ <syscall name="nfsservctl" number="168"/>
+ <syscall name="setresgid" number="169"/>
+ <syscall name="getresgid" number="170"/>
+ <syscall name="prctl" number="171"/>
+ <syscall name="rt_sigreturn" number="172"/>
+ <syscall name="rt_sigaction" number="173"/>
+ <syscall name="rt_sigprocmask" number="174"/>
+ <syscall name="rt_sigpending" number="175"/>
+ <syscall name="rt_sigtimedwait" number="176"/>
+ <syscall name="rt_sigqueueinfo" number="177"/>
+ <syscall name="rt_sigsuspend" number="178"/>
+ <syscall name="pread64" number="179"/>
+ <syscall name="pwrite64" number="180"/>
+ <syscall name="chown" number="181"/>
+ <syscall name="getcwd" number="182"/>
+ <syscall name="capget" number="183"/>
+ <syscall name="capset" number="184"/>
+ <syscall name="sigaltstack" number="185"/>
+ <syscall name="sendfile" number="186"/>
+ <syscall name="getpmsg" number="187"/>
+ <syscall name="putpmsg" number="188"/>
+ <syscall name="vfork" number="189"/>
+ <syscall name="ugetrlimit" number="190"/>
+ <syscall name="readahead" number="191"/>
+ <syscall name="mmap2" number="192"/>
+ <syscall name="truncate64" number="193"/>
+ <syscall name="ftruncate64" number="194"/>
+ <syscall name="stat64" number="195"/>
+ <syscall name="lstat64" number="196"/>
+ <syscall name="fstat64" number="197"/>
+ <syscall name="pciconfig_read" number="198"/>
+ <syscall name="pciconfig_write" number="199"/>
+ <syscall name="pciconfig_iobase" number="200"/>
+ <syscall name="multiplexer" number="201"/>
+ <syscall name="getdents64" number="202"/>
+ <syscall name="pivot_root" number="203"/>
+ <syscall name="fcntl64" number="204"/>
+ <syscall name="madvise" number="205"/>
+ <syscall name="mincore" number="206"/>
+ <syscall name="gettid" number="207"/>
+ <syscall name="tkill" number="208"/>
+ <syscall name="setxattr" number="209"/>
+ <syscall name="lsetxattr" number="210"/>
+ <syscall name="fsetxattr" number="211"/>
+ <syscall name="getxattr" number="212"/>
+ <syscall name="lgetxattr" number="213"/>
+ <syscall name="fgetxattr" number="214"/>
+ <syscall name="listxattr" number="215"/>
+ <syscall name="llistxattr" number="216"/>
+ <syscall name="flistxattr" number="217"/>
+ <syscall name="removexattr" number="218"/>
+ <syscall name="lremovexattr" number="219"/>
+ <syscall name="fremovexattr" number="220"/>
+ <syscall name="futex" number="221"/>
+ <syscall name="sched_setaffinity" number="222"/>
+ <syscall name="sched_getaffinity" number="223"/>
+ <syscall name="tuxcall" number="225"/>
+ <syscall name="sendfile64" number="226"/>
+ <syscall name="io_setup" number="227"/>
+ <syscall name="io_destroy" number="228"/>
+ <syscall name="io_getevents" number="229"/>
+ <syscall name="io_submit" number="230"/>
+ <syscall name="io_cancel" number="231"/>
+ <syscall name="set_tid_address" number="232"/>
+ <syscall name="fadvise64" number="233"/>
+ <syscall name="exit_group" number="234"/>
+ <syscall name="lookup_dcookie" number="235"/>
+ <syscall name="epoll_create" number="236"/>
+ <syscall name="epoll_ctl" number="237"/>
+ <syscall name="epoll_wait" number="238"/>
+ <syscall name="remap_file_pages" number="239"/>
+ <syscall name="timer_create" number="240"/>
+ <syscall name="timer_settime" number="241"/>
+ <syscall name="timer_gettime" number="242"/>
+ <syscall name="timer_getoverrun" number="243"/>
+ <syscall name="timer_delete" number="244"/>
+ <syscall name="clock_settime" number="245"/>
+ <syscall name="clock_gettime" number="246"/>
+ <syscall name="clock_getres" number="247"/>
+ <syscall name="clock_nanosleep" number="248"/>
+ <syscall name="swapcontext" number="249"/>
+ <syscall name="tgkill" number="250"/>
+ <syscall name="utimes" number="251"/>
+ <syscall name="statfs64" number="252"/>
+ <syscall name="fstatfs64" number="253"/>
+ <syscall name="fadvise64_64" number="254"/>
+ <syscall name="rtas" number="255"/>
+ <syscall name="sys_debug_setcontext" number="256"/>
+ <syscall name="mbind" number="259"/>
+ <syscall name="get_mempolicy" number="260"/>
+ <syscall name="set_mempolicy" number="261"/>
+ <syscall name="mq_open" number="262"/>
+ <syscall name="mq_unlink" number="263"/>
+ <syscall name="mq_timedsend" number="264"/>
+ <syscall name="mq_timedreceive" number="265"/>
+ <syscall name="mq_notify" number="266"/>
+ <syscall name="mq_getsetattr" number="267"/>
+ <syscall name="kexec_load" number="268"/>
+ <syscall name="add_key" number="269"/>
+ <syscall name="request_key" number="270"/>
+ <syscall name="keyctl" number="271"/>
+ <syscall name="waitid" number="272"/>
+ <syscall name="ioprio_set" number="273"/>
+ <syscall name="ioprio_get" number="274"/>
+ <syscall name="inotify_init" number="275"/>
+ <syscall name="inotify_add_watch" number="276"/>
+ <syscall name="inotify_rm_watch" number="277"/>
+ <syscall name="spu_run" number="278"/>
+ <syscall name="spu_create" number="279"/>
+ <syscall name="pselect6" number="280"/>
+ <syscall name="ppoll" number="281"/>
+ <syscall name="unshare" number="282"/>
+ <syscall name="openat" number="286"/>
+ <syscall name="mkdirat" number="287"/>
+ <syscall name="mknodat" number="288"/>
+ <syscall name="fchownat" number="289"/>
+ <syscall name="futimesat" number="290"/>
+ <syscall name="fstatat64" number="291"/>
+ <syscall name="unlinkat" number="292"/>
+ <syscall name="renameat" number="293"/>
+ <syscall name="linkat" number="294"/>
+ <syscall name="symlinkat" number="295"/>
+ <syscall name="readlinkat" number="296"/>
+ <syscall name="fchmodat" number="297"/>
+ <syscall name="faccessat" number="298"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/ppc64-linux.xml b/share/gdb/syscalls/ppc64-linux.xml
new file mode 100644
index 0000000..ad56db1
--- /dev/null
+++ b/share/gdb/syscalls/ppc64-linux.xml
@@ -0,0 +1,295 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/powerpc/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="restart_syscall" number="0"/>
+ <syscall name="exit" number="1"/>
+ <syscall name="fork" number="2"/>
+ <syscall name="read" number="3"/>
+ <syscall name="write" number="4"/>
+ <syscall name="open" number="5"/>
+ <syscall name="close" number="6"/>
+ <syscall name="waitpid" number="7"/>
+ <syscall name="creat" number="8"/>
+ <syscall name="link" number="9"/>
+ <syscall name="unlink" number="10"/>
+ <syscall name="execve" number="11"/>
+ <syscall name="chdir" number="12"/>
+ <syscall name="time" number="13"/>
+ <syscall name="mknod" number="14"/>
+ <syscall name="chmod" number="15"/>
+ <syscall name="lchown" number="16"/>
+ <syscall name="break" number="17"/>
+ <syscall name="oldstat" number="18"/>
+ <syscall name="lseek" number="19"/>
+ <syscall name="getpid" number="20"/>
+ <syscall name="mount" number="21"/>
+ <syscall name="umount" number="22"/>
+ <syscall name="setuid" number="23"/>
+ <syscall name="getuid" number="24"/>
+ <syscall name="stime" number="25"/>
+ <syscall name="ptrace" number="26"/>
+ <syscall name="alarm" number="27"/>
+ <syscall name="oldfstat" number="28"/>
+ <syscall name="pause" number="29"/>
+ <syscall name="utime" number="30"/>
+ <syscall name="stty" number="31"/>
+ <syscall name="gtty" number="32"/>
+ <syscall name="access" number="33"/>
+ <syscall name="nice" number="34"/>
+ <syscall name="ftime" number="35"/>
+ <syscall name="sync" number="36"/>
+ <syscall name="kill" number="37"/>
+ <syscall name="rename" number="38"/>
+ <syscall name="mkdir" number="39"/>
+ <syscall name="rmdir" number="40"/>
+ <syscall name="dup" number="41"/>
+ <syscall name="pipe" number="42"/>
+ <syscall name="times" number="43"/>
+ <syscall name="prof" number="44"/>
+ <syscall name="brk" number="45"/>
+ <syscall name="setgid" number="46"/>
+ <syscall name="getgid" number="47"/>
+ <syscall name="signal" number="48"/>
+ <syscall name="geteuid" number="49"/>
+ <syscall name="getegid" number="50"/>
+ <syscall name="acct" number="51"/>
+ <syscall name="umount2" number="52"/>
+ <syscall name="lock" number="53"/>
+ <syscall name="ioctl" number="54"/>
+ <syscall name="fcntl" number="55"/>
+ <syscall name="mpx" number="56"/>
+ <syscall name="setpgid" number="57"/>
+ <syscall name="ulimit" number="58"/>
+ <syscall name="oldolduname" number="59"/>
+ <syscall name="umask" number="60"/>
+ <syscall name="chroot" number="61"/>
+ <syscall name="ustat" number="62"/>
+ <syscall name="dup2" number="63"/>
+ <syscall name="getppid" number="64"/>
+ <syscall name="getpgrp" number="65"/>
+ <syscall name="setsid" number="66"/>
+ <syscall name="sigaction" number="67"/>
+ <syscall name="sgetmask" number="68"/>
+ <syscall name="ssetmask" number="69"/>
+ <syscall name="setreuid" number="70"/>
+ <syscall name="setregid" number="71"/>
+ <syscall name="sigsuspend" number="72"/>
+ <syscall name="sigpending" number="73"/>
+ <syscall name="sethostname" number="74"/>
+ <syscall name="setrlimit" number="75"/>
+ <syscall name="getrlimit" number="76"/>
+ <syscall name="getrusage" number="77"/>
+ <syscall name="gettimeofday" number="78"/>
+ <syscall name="settimeofday" number="79"/>
+ <syscall name="getgroups" number="80"/>
+ <syscall name="setgroups" number="81"/>
+ <syscall name="select" number="82"/>
+ <syscall name="symlink" number="83"/>
+ <syscall name="oldlstat" number="84"/>
+ <syscall name="readlink" number="85"/>
+ <syscall name="uselib" number="86"/>
+ <syscall name="swapon" number="87"/>
+ <syscall name="reboot" number="88"/>
+ <syscall name="readdir" number="89"/>
+ <syscall name="mmap" number="90"/>
+ <syscall name="munmap" number="91"/>
+ <syscall name="truncate" number="92"/>
+ <syscall name="ftruncate" number="93"/>
+ <syscall name="fchmod" number="94"/>
+ <syscall name="fchown" number="95"/>
+ <syscall name="getpriority" number="96"/>
+ <syscall name="setpriority" number="97"/>
+ <syscall name="profil" number="98"/>
+ <syscall name="statfs" number="99"/>
+ <syscall name="fstatfs" number="100"/>
+ <syscall name="ioperm" number="101"/>
+ <syscall name="socketcall" number="102"/>
+ <syscall name="syslog" number="103"/>
+ <syscall name="setitimer" number="104"/>
+ <syscall name="getitimer" number="105"/>
+ <syscall name="stat" number="106"/>
+ <syscall name="lstat" number="107"/>
+ <syscall name="fstat" number="108"/>
+ <syscall name="olduname" number="109"/>
+ <syscall name="iopl" number="110"/>
+ <syscall name="vhangup" number="111"/>
+ <syscall name="idle" number="112"/>
+ <syscall name="vm86" number="113"/>
+ <syscall name="wait4" number="114"/>
+ <syscall name="swapoff" number="115"/>
+ <syscall name="sysinfo" number="116"/>
+ <syscall name="ipc" number="117"/>
+ <syscall name="fsync" number="118"/>
+ <syscall name="sigreturn" number="119"/>
+ <syscall name="clone" number="120"/>
+ <syscall name="setdomainname" number="121"/>
+ <syscall name="uname" number="122"/>
+ <syscall name="modify_ldt" number="123"/>
+ <syscall name="adjtimex" number="124"/>
+ <syscall name="mprotect" number="125"/>
+ <syscall name="sigprocmask" number="126"/>
+ <syscall name="create_module" number="127"/>
+ <syscall name="init_module" number="128"/>
+ <syscall name="delete_module" number="129"/>
+ <syscall name="get_kernel_syms" number="130"/>
+ <syscall name="quotactl" number="131"/>
+ <syscall name="getpgid" number="132"/>
+ <syscall name="fchdir" number="133"/>
+ <syscall name="bdflush" number="134"/>
+ <syscall name="sysfs" number="135"/>
+ <syscall name="personality" number="136"/>
+ <syscall name="afs_syscall" number="137"/>
+ <syscall name="setfsuid" number="138"/>
+ <syscall name="setfsgid" number="139"/>
+ <syscall name="_llseek" number="140"/>
+ <syscall name="getdents" number="141"/>
+ <syscall name="_newselect" number="142"/>
+ <syscall name="flock" number="143"/>
+ <syscall name="msync" number="144"/>
+ <syscall name="readv" number="145"/>
+ <syscall name="writev" number="146"/>
+ <syscall name="getsid" number="147"/>
+ <syscall name="fdatasync" number="148"/>
+ <syscall name="_sysctl" number="149"/>
+ <syscall name="mlock" number="150"/>
+ <syscall name="munlock" number="151"/>
+ <syscall name="mlockall" number="152"/>
+ <syscall name="munlockall" number="153"/>
+ <syscall name="sched_setparam" number="154"/>
+ <syscall name="sched_getparam" number="155"/>
+ <syscall name="sched_setscheduler" number="156"/>
+ <syscall name="sched_getscheduler" number="157"/>
+ <syscall name="sched_yield" number="158"/>
+ <syscall name="sched_get_priority_max" number="159"/>
+ <syscall name="sched_get_priority_min" number="160"/>
+ <syscall name="sched_rr_get_interval" number="161"/>
+ <syscall name="nanosleep" number="162"/>
+ <syscall name="mremap" number="163"/>
+ <syscall name="setresuid" number="164"/>
+ <syscall name="getresuid" number="165"/>
+ <syscall name="query_module" number="166"/>
+ <syscall name="poll" number="167"/>
+ <syscall name="nfsservctl" number="168"/>
+ <syscall name="setresgid" number="169"/>
+ <syscall name="getresgid" number="170"/>
+ <syscall name="prctl" number="171"/>
+ <syscall name="rt_sigreturn" number="172"/>
+ <syscall name="rt_sigaction" number="173"/>
+ <syscall name="rt_sigprocmask" number="174"/>
+ <syscall name="rt_sigpending" number="175"/>
+ <syscall name="rt_sigtimedwait" number="176"/>
+ <syscall name="rt_sigqueueinfo" number="177"/>
+ <syscall name="rt_sigsuspend" number="178"/>
+ <syscall name="pread64" number="179"/>
+ <syscall name="pwrite64" number="180"/>
+ <syscall name="chown" number="181"/>
+ <syscall name="getcwd" number="182"/>
+ <syscall name="capget" number="183"/>
+ <syscall name="capset" number="184"/>
+ <syscall name="sigaltstack" number="185"/>
+ <syscall name="sendfile" number="186"/>
+ <syscall name="getpmsg" number="187"/>
+ <syscall name="putpmsg" number="188"/>
+ <syscall name="vfork" number="189"/>
+ <syscall name="ugetrlimit" number="190"/>
+ <syscall name="readahead" number="191"/>
+ <syscall name="pciconfig_read" number="198"/>
+ <syscall name="pciconfig_write" number="199"/>
+ <syscall name="pciconfig_iobase" number="200"/>
+ <syscall name="multiplexer" number="201"/>
+ <syscall name="getdents64" number="202"/>
+ <syscall name="pivot_root" number="203"/>
+ <syscall name="madvise" number="205"/>
+ <syscall name="mincore" number="206"/>
+ <syscall name="gettid" number="207"/>
+ <syscall name="tkill" number="208"/>
+ <syscall name="setxattr" number="209"/>
+ <syscall name="lsetxattr" number="210"/>
+ <syscall name="fsetxattr" number="211"/>
+ <syscall name="getxattr" number="212"/>
+ <syscall name="lgetxattr" number="213"/>
+ <syscall name="fgetxattr" number="214"/>
+ <syscall name="listxattr" number="215"/>
+ <syscall name="llistxattr" number="216"/>
+ <syscall name="flistxattr" number="217"/>
+ <syscall name="removexattr" number="218"/>
+ <syscall name="lremovexattr" number="219"/>
+ <syscall name="fremovexattr" number="220"/>
+ <syscall name="futex" number="221"/>
+ <syscall name="sched_setaffinity" number="222"/>
+ <syscall name="sched_getaffinity" number="223"/>
+ <syscall name="tuxcall" number="225"/>
+ <syscall name="io_setup" number="227"/>
+ <syscall name="io_destroy" number="228"/>
+ <syscall name="io_getevents" number="229"/>
+ <syscall name="io_submit" number="230"/>
+ <syscall name="io_cancel" number="231"/>
+ <syscall name="set_tid_address" number="232"/>
+ <syscall name="fadvise64" number="233"/>
+ <syscall name="exit_group" number="234"/>
+ <syscall name="lookup_dcookie" number="235"/>
+ <syscall name="epoll_create" number="236"/>
+ <syscall name="epoll_ctl" number="237"/>
+ <syscall name="epoll_wait" number="238"/>
+ <syscall name="remap_file_pages" number="239"/>
+ <syscall name="timer_create" number="240"/>
+ <syscall name="timer_settime" number="241"/>
+ <syscall name="timer_gettime" number="242"/>
+ <syscall name="timer_getoverrun" number="243"/>
+ <syscall name="timer_delete" number="244"/>
+ <syscall name="clock_settime" number="245"/>
+ <syscall name="clock_gettime" number="246"/>
+ <syscall name="clock_getres" number="247"/>
+ <syscall name="clock_nanosleep" number="248"/>
+ <syscall name="swapcontext" number="249"/>
+ <syscall name="tgkill" number="250"/>
+ <syscall name="utimes" number="251"/>
+ <syscall name="statfs64" number="252"/>
+ <syscall name="fstatfs64" number="253"/>
+ <syscall name="rtas" number="255"/>
+ <syscall name="sys_debug_setcontext" number="256"/>
+ <syscall name="mbind" number="259"/>
+ <syscall name="get_mempolicy" number="260"/>
+ <syscall name="set_mempolicy" number="261"/>
+ <syscall name="mq_open" number="262"/>
+ <syscall name="mq_unlink" number="263"/>
+ <syscall name="mq_timedsend" number="264"/>
+ <syscall name="mq_timedreceive" number="265"/>
+ <syscall name="mq_notify" number="266"/>
+ <syscall name="mq_getsetattr" number="267"/>
+ <syscall name="kexec_load" number="268"/>
+ <syscall name="add_key" number="269"/>
+ <syscall name="request_key" number="270"/>
+ <syscall name="keyctl" number="271"/>
+ <syscall name="waitid" number="272"/>
+ <syscall name="ioprio_set" number="273"/>
+ <syscall name="ioprio_get" number="274"/>
+ <syscall name="inotify_init" number="275"/>
+ <syscall name="inotify_add_watch" number="276"/>
+ <syscall name="inotify_rm_watch" number="277"/>
+ <syscall name="spu_run" number="278"/>
+ <syscall name="spu_create" number="279"/>
+ <syscall name="pselect6" number="280"/>
+ <syscall name="ppoll" number="281"/>
+ <syscall name="unshare" number="282"/>
+ <syscall name="unlinkat" number="286"/>
+ <syscall name="renameat" number="287"/>
+ <syscall name="linkat" number="288"/>
+ <syscall name="symlinkat" number="289"/>
+ <syscall name="readlinkat" number="290"/>
+ <syscall name="fchmodat" number="291"/>
+ <syscall name="faccessat" number="292"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/sparc-linux.xml b/share/gdb/syscalls/sparc-linux.xml
new file mode 100644
index 0000000..7673621
--- /dev/null
+++ b/share/gdb/syscalls/sparc-linux.xml
@@ -0,0 +1,344 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/sparc/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="restart_syscall" number="0"/>
+ <syscall name="exit" number="1"/>
+ <syscall name="fork" number="2"/>
+ <syscall name="read" number="3"/>
+ <syscall name="write" number="4"/>
+ <syscall name="open" number="5"/>
+ <syscall name="close" number="6"/>
+ <syscall name="wait4" number="7"/>
+ <syscall name="creat" number="8"/>
+ <syscall name="link" number="9"/>
+ <syscall name="unlink" number="10"/>
+ <syscall name="execv" number="11"/>
+ <syscall name="chdir" number="12"/>
+ <syscall name="chown" number="13"/>
+ <syscall name="mknod" number="14"/>
+ <syscall name="chmod" number="15"/>
+ <syscall name="lchown" number="16"/>
+ <syscall name="brk" number="17"/>
+ <syscall name="perfctr" number="18"/>
+ <syscall name="lseek" number="19"/>
+ <syscall name="getpid" number="20"/>
+ <syscall name="capget" number="21"/>
+ <syscall name="capset" number="22"/>
+ <syscall name="setuid" number="23"/>
+ <syscall name="getuid" number="24"/>
+ <syscall name="vmsplice" number="25"/>
+ <syscall name="ptrace" number="26"/>
+ <syscall name="alarm" number="27"/>
+ <syscall name="sigaltstack" number="28"/>
+ <syscall name="pause" number="29"/>
+ <syscall name="utime" number="30"/>
+ <syscall name="lchown32" number="31"/>
+ <syscall name="fchown32" number="32"/>
+ <syscall name="access" number="33"/>
+ <syscall name="nice" number="34"/>
+ <syscall name="chown32" number="35"/>
+ <syscall name="sync" number="36"/>
+ <syscall name="kill" number="37"/>
+ <syscall name="stat" number="38"/>
+ <syscall name="sendfile" number="39"/>
+ <syscall name="lstat" number="40"/>
+ <syscall name="dup" number="41"/>
+ <syscall name="pipe" number="42"/>
+ <syscall name="times" number="43"/>
+ <syscall name="getuid32" number="44"/>
+ <syscall name="umount2" number="45"/>
+ <syscall name="setgid" number="46"/>
+ <syscall name="getgid" number="47"/>
+ <syscall name="signal" number="48"/>
+ <syscall name="geteuid" number="49"/>
+ <syscall name="getegid" number="50"/>
+ <syscall name="acct" number="51"/>
+ <syscall name="getgid32" number="53"/>
+ <syscall name="ioctl" number="54"/>
+ <syscall name="reboot" number="55"/>
+ <syscall name="mmap2" number="56"/>
+ <syscall name="symlink" number="57"/>
+ <syscall name="readlink" number="58"/>
+ <syscall name="execve" number="59"/>
+ <syscall name="umask" number="60"/>
+ <syscall name="chroot" number="61"/>
+ <syscall name="fstat" number="62"/>
+ <syscall name="fstat64" number="63"/>
+ <syscall name="getpagesize" number="64"/>
+ <syscall name="msync" number="65"/>
+ <syscall name="vfork" number="66"/>
+ <syscall name="pread64" number="67"/>
+ <syscall name="pwrite64" number="68"/>
+ <syscall name="geteuid32" number="69"/>
+ <syscall name="getegid32" number="70"/>
+ <syscall name="mmap" number="71"/>
+ <syscall name="setreuid32" number="72"/>
+ <syscall name="munmap" number="73"/>
+ <syscall name="mprotect" number="74"/>
+ <syscall name="madvise" number="75"/>
+ <syscall name="vhangup" number="76"/>
+ <syscall name="truncate64" number="77"/>
+ <syscall name="mincore" number="78"/>
+ <syscall name="getgroups" number="79"/>
+ <syscall name="setgroups" number="80"/>
+ <syscall name="getpgrp" number="81"/>
+ <syscall name="setgroups32" number="82"/>
+ <syscall name="setitimer" number="83"/>
+ <syscall name="ftruncate64" number="84"/>
+ <syscall name="swapon" number="85"/>
+ <syscall name="getitimer" number="86"/>
+ <syscall name="setuid32" number="87"/>
+ <syscall name="sethostname" number="88"/>
+ <syscall name="setgid32" number="89"/>
+ <syscall name="dup2" number="90"/>
+ <syscall name="setfsuid32" number="91"/>
+ <syscall name="fcntl" number="92"/>
+ <syscall name="select" number="93"/>
+ <syscall name="setfsgid32" number="94"/>
+ <syscall name="fsync" number="95"/>
+ <syscall name="setpriority" number="96"/>
+ <syscall name="socket" number="97"/>
+ <syscall name="connect" number="98"/>
+ <syscall name="accept" number="99"/>
+ <syscall name="getpriority" number="100"/>
+ <syscall name="rt_sigreturn" number="101"/>
+ <syscall name="rt_sigaction" number="102"/>
+ <syscall name="rt_sigprocmask" number="103"/>
+ <syscall name="rt_sigpending" number="104"/>
+ <syscall name="rt_sigtimedwait" number="105"/>
+ <syscall name="rt_sigqueueinfo" number="106"/>
+ <syscall name="rt_sigsuspend" number="107"/>
+ <syscall name="setresuid32" number="108"/>
+ <syscall name="getresuid32" number="109"/>
+ <syscall name="setresgid32" number="110"/>
+ <syscall name="getresgid32" number="111"/>
+ <syscall name="setregid32" number="112"/>
+ <syscall name="recvmsg" number="113"/>
+ <syscall name="sendmsg" number="114"/>
+ <syscall name="getgroups32" number="115"/>
+ <syscall name="gettimeofday" number="116"/>
+ <syscall name="getrusage" number="117"/>
+ <syscall name="getsockopt" number="118"/>
+ <syscall name="getcwd" number="119"/>
+ <syscall name="readv" number="120"/>
+ <syscall name="writev" number="121"/>
+ <syscall name="settimeofday" number="122"/>
+ <syscall name="fchown" number="123"/>
+ <syscall name="fchmod" number="124"/>
+ <syscall name="recvfrom" number="125"/>
+ <syscall name="setreuid" number="126"/>
+ <syscall name="setregid" number="127"/>
+ <syscall name="rename" number="128"/>
+ <syscall name="truncate" number="129"/>
+ <syscall name="ftruncate" number="130"/>
+ <syscall name="flock" number="131"/>
+ <syscall name="lstat64" number="132"/>
+ <syscall name="sendto" number="133"/>
+ <syscall name="shutdown" number="134"/>
+ <syscall name="socketpair" number="135"/>
+ <syscall name="mkdir" number="136"/>
+ <syscall name="rmdir" number="137"/>
+ <syscall name="utimes" number="138"/>
+ <syscall name="stat64" number="139"/>
+ <syscall name="sendfile64" number="140"/>
+ <syscall name="getpeername" number="141"/>
+ <syscall name="futex" number="142"/>
+ <syscall name="gettid" number="143"/>
+ <syscall name="getrlimit" number="144"/>
+ <syscall name="setrlimit" number="145"/>
+ <syscall name="pivot_root" number="146"/>
+ <syscall name="prctl" number="147"/>
+ <syscall name="pciconfig_read" number="148"/>
+ <syscall name="pciconfig_write" number="149"/>
+ <syscall name="getsockname" number="150"/>
+ <syscall name="inotify_init" number="151"/>
+ <syscall name="inotify_add_watch" number="152"/>
+ <syscall name="poll" number="153"/>
+ <syscall name="getdents64" number="154"/>
+ <syscall name="fcntl64" number="155"/>
+ <syscall name="inotify_rm_watch" number="156"/>
+ <syscall name="statfs" number="157"/>
+ <syscall name="fstatfs" number="158"/>
+ <syscall name="umount" number="159"/>
+ <syscall name="sched_set_affinity" number="160"/>
+ <syscall name="sched_get_affinity" number="161"/>
+ <syscall name="getdomainname" number="162"/>
+ <syscall name="setdomainname" number="163"/>
+ <syscall name="quotactl" number="165"/>
+ <syscall name="set_tid_address" number="166"/>
+ <syscall name="mount" number="167"/>
+ <syscall name="ustat" number="168"/>
+ <syscall name="setxattr" number="169"/>
+ <syscall name="lsetxattr" number="170"/>
+ <syscall name="fsetxattr" number="171"/>
+ <syscall name="getxattr" number="172"/>
+ <syscall name="lgetxattr" number="173"/>
+ <syscall name="getdents" number="174"/>
+ <syscall name="setsid" number="175"/>
+ <syscall name="fchdir" number="176"/>
+ <syscall name="fgetxattr" number="177"/>
+ <syscall name="listxattr" number="178"/>
+ <syscall name="llistxattr" number="179"/>
+ <syscall name="flistxattr" number="180"/>
+ <syscall name="removexattr" number="181"/>
+ <syscall name="lremovexattr" number="182"/>
+ <syscall name="sigpending" number="183"/>
+ <syscall name="query_module" number="184"/>
+ <syscall name="setpgid" number="185"/>
+ <syscall name="fremovexattr" number="186"/>
+ <syscall name="tkill" number="187"/>
+ <syscall name="exit_group" number="188"/>
+ <syscall name="uname" number="189"/>
+ <syscall name="init_module" number="190"/>
+ <syscall name="personality" number="191"/>
+ <syscall name="remap_file_pages" number="192"/>
+ <syscall name="epoll_create" number="193"/>
+ <syscall name="epoll_ctl" number="194"/>
+ <syscall name="epoll_wait" number="195"/>
+ <syscall name="ioprio_set" number="196"/>
+ <syscall name="getppid" number="197"/>
+ <syscall name="sigaction" number="198"/>
+ <syscall name="sgetmask" number="199"/>
+ <syscall name="ssetmask" number="200"/>
+ <syscall name="sigsuspend" number="201"/>
+ <syscall name="oldlstat" number="202"/>
+ <syscall name="uselib" number="203"/>
+ <syscall name="readdir" number="204"/>
+ <syscall name="readahead" number="205"/>
+ <syscall name="socketcall" number="206"/>
+ <syscall name="syslog" number="207"/>
+ <syscall name="lookup_dcookie" number="208"/>
+ <syscall name="fadvise64" number="209"/>
+ <syscall name="fadvise64_64" number="210"/>
+ <syscall name="tgkill" number="211"/>
+ <syscall name="waitpid" number="212"/>
+ <syscall name="swapoff" number="213"/>
+ <syscall name="sysinfo" number="214"/>
+ <syscall name="ipc" number="215"/>
+ <syscall name="sigreturn" number="216"/>
+ <syscall name="clone" number="217"/>
+ <syscall name="ioprio_get" number="218"/>
+ <syscall name="adjtimex" number="219"/>
+ <syscall name="sigprocmask" number="220"/>
+ <syscall name="create_module" number="221"/>
+ <syscall name="delete_module" number="222"/>
+ <syscall name="get_kernel_syms" number="223"/>
+ <syscall name="getpgid" number="224"/>
+ <syscall name="bdflush" number="225"/>
+ <syscall name="sysfs" number="226"/>
+ <syscall name="afs_syscall" number="227"/>
+ <syscall name="setfsuid" number="228"/>
+ <syscall name="setfsgid" number="229"/>
+ <syscall name="_newselect" number="230"/>
+ <syscall name="time" number="231"/>
+ <syscall name="splice" number="232"/>
+ <syscall name="stime" number="233"/>
+ <syscall name="statfs64" number="234"/>
+ <syscall name="fstatfs64" number="235"/>
+ <syscall name="_llseek" number="236"/>
+ <syscall name="mlock" number="237"/>
+ <syscall name="munlock" number="238"/>
+ <syscall name="mlockall" number="239"/>
+ <syscall name="munlockall" number="240"/>
+ <syscall name="sched_setparam" number="241"/>
+ <syscall name="sched_getparam" number="242"/>
+ <syscall name="sched_setscheduler" number="243"/>
+ <syscall name="sched_getscheduler" number="244"/>
+ <syscall name="sched_yield" number="245"/>
+ <syscall name="sched_get_priority_max" number="246"/>
+ <syscall name="sched_get_priority_min" number="247"/>
+ <syscall name="sched_rr_get_interval" number="248"/>
+ <syscall name="nanosleep" number="249"/>
+ <syscall name="mremap" number="250"/>
+ <syscall name="_sysctl" number="251"/>
+ <syscall name="getsid" number="252"/>
+ <syscall name="fdatasync" number="253"/>
+ <syscall name="nfsservctl" number="254"/>
+ <syscall name="sync_file_range" number="255"/>
+ <syscall name="clock_settime" number="256"/>
+ <syscall name="clock_gettime" number="257"/>
+ <syscall name="clock_getres" number="258"/>
+ <syscall name="clock_nanosleep" number="259"/>
+ <syscall name="sched_getaffinity" number="260"/>
+ <syscall name="sched_setaffinity" number="261"/>
+ <syscall name="timer_settime" number="262"/>
+ <syscall name="timer_gettime" number="263"/>
+ <syscall name="timer_getoverrun" number="264"/>
+ <syscall name="timer_delete" number="265"/>
+ <syscall name="timer_create" number="266"/>
+ <syscall name="vserver" number="267"/>
+ <syscall name="io_setup" number="268"/>
+ <syscall name="io_destroy" number="269"/>
+ <syscall name="io_submit" number="270"/>
+ <syscall name="io_cancel" number="271"/>
+ <syscall name="io_getevents" number="272"/>
+ <syscall name="mq_open" number="273"/>
+ <syscall name="mq_unlink" number="274"/>
+ <syscall name="mq_timedsend" number="275"/>
+ <syscall name="mq_timedreceive" number="276"/>
+ <syscall name="mq_notify" number="277"/>
+ <syscall name="mq_getsetattr" number="278"/>
+ <syscall name="waitid" number="279"/>
+ <syscall name="tee" number="280"/>
+ <syscall name="add_key" number="281"/>
+ <syscall name="request_key" number="282"/>
+ <syscall name="keyctl" number="283"/>
+ <syscall name="openat" number="284"/>
+ <syscall name="mkdirat" number="285"/>
+ <syscall name="mknodat" number="286"/>
+ <syscall name="fchownat" number="287"/>
+ <syscall name="futimesat" number="288"/>
+ <syscall name="fstatat64" number="289"/>
+ <syscall name="unlinkat" number="290"/>
+ <syscall name="renameat" number="291"/>
+ <syscall name="linkat" number="292"/>
+ <syscall name="symlinkat" number="293"/>
+ <syscall name="readlinkat" number="294"/>
+ <syscall name="fchmodat" number="295"/>
+ <syscall name="faccessat" number="296"/>
+ <syscall name="pselect6" number="297"/>
+ <syscall name="ppoll" number="298"/>
+ <syscall name="unshare" number="299"/>
+ <syscall name="set_robust_list" number="300"/>
+ <syscall name="get_robust_list" number="301"/>
+ <syscall name="migrate_pages" number="302"/>
+ <syscall name="mbind" number="303"/>
+ <syscall name="get_mempolicy" number="304"/>
+ <syscall name="set_mempolicy" number="305"/>
+ <syscall name="kexec_load" number="306"/>
+ <syscall name="move_pages" number="307"/>
+ <syscall name="getcpu" number="308"/>
+ <syscall name="epoll_pwait" number="309"/>
+ <syscall name="utimensat" number="310"/>
+ <syscall name="signalfd" number="311"/>
+ <syscall name="timerfd_create" number="312"/>
+ <syscall name="eventfd" number="313"/>
+ <syscall name="fallocate" number="314"/>
+ <syscall name="timerfd_settime" number="315"/>
+ <syscall name="timerfd_gettime" number="316"/>
+ <syscall name="signalfd4" number="317"/>
+ <syscall name="eventfd2" number="318"/>
+ <syscall name="epoll_create1" number="319"/>
+ <syscall name="dup3" number="320"/>
+ <syscall name="pipe2" number="321"/>
+ <syscall name="inotify_init1" number="322"/>
+ <syscall name="accept4" number="323"/>
+ <syscall name="preadv" number="324"/>
+ <syscall name="pwritev" number="325"/>
+ <syscall name="rt_tgsigqueueinfo" number="326"/>
+ <syscall name="perf_event_open" number="327"/>
+ <syscall name="recvmmsg" number="328"/>
+</syscalls_info>
diff --git a/share/gdb/syscalls/sparc64-linux.xml b/share/gdb/syscalls/sparc64-linux.xml
new file mode 100644
index 0000000..4403ca3
--- /dev/null
+++ b/share/gdb/syscalls/sparc64-linux.xml
@@ -0,0 +1,326 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2013 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-syscalls.dtd">
+
+<!-- This file was generated using the following file:
+
+ /usr/src/linux/arch/sparc/include/asm/unistd.h
+
+ The file mentioned above belongs to the Linux Kernel. -->
+
+<syscalls_info>
+ <syscall name="restart_syscall" number="0"/>
+ <syscall name="exit" number="1"/>
+ <syscall name="fork" number="2"/>
+ <syscall name="read" number="3"/>
+ <syscall name="write" number="4"/>
+ <syscall name="open" number="5"/>
+ <syscall name="close" number="6"/>
+ <syscall name="wait4" number="7"/>
+ <syscall name="creat" number="8"/>
+ <syscall name="link" number="9"/>
+ <syscall name="unlink" number="10"/>
+ <syscall name="execv" number="11"/>
+ <syscall name="chdir" number="12"/>
+ <syscall name="chown" number="13"/>
+ <syscall name="mknod" number="14"/>
+ <syscall name="chmod" number="15"/>
+ <syscall name="lchown" number="16"/>
+ <syscall name="brk" number="17"/>
+ <syscall name="perfctr" number="18"/>
+ <syscall name="lseek" number="19"/>
+ <syscall name="getpid" number="20"/>
+ <syscall name="capget" number="21"/>
+ <syscall name="capset" number="22"/>
+ <syscall name="setuid" number="23"/>
+ <syscall name="getuid" number="24"/>
+ <syscall name="vmsplice" number="25"/>
+ <syscall name="ptrace" number="26"/>
+ <syscall name="alarm" number="27"/>
+ <syscall name="sigaltstack" number="28"/>
+ <syscall name="pause" number="29"/>
+ <syscall name="utime" number="30"/>
+ <syscall name="access" number="33"/>
+ <syscall name="nice" number="34"/>
+ <syscall name="sync" number="36"/>
+ <syscall name="kill" number="37"/>
+ <syscall name="stat" number="38"/>
+ <syscall name="sendfile" number="39"/>
+ <syscall name="lstat" number="40"/>
+ <syscall name="dup" number="41"/>
+ <syscall name="pipe" number="42"/>
+ <syscall name="times" number="43"/>
+ <syscall name="umount2" number="45"/>
+ <syscall name="setgid" number="46"/>
+ <syscall name="getgid" number="47"/>
+ <syscall name="signal" number="48"/>
+ <syscall name="geteuid" number="49"/>
+ <syscall name="getegid" number="50"/>
+ <syscall name="acct" number="51"/>
+ <syscall name="memory_ordering" number="52"/>
+ <syscall name="ioctl" number="54"/>
+ <syscall name="reboot" number="55"/>
+ <syscall name="symlink" number="57"/>
+ <syscall name="readlink" number="58"/>
+ <syscall name="execve" number="59"/>
+ <syscall name="umask" number="60"/>
+ <syscall name="chroot" number="61"/>
+ <syscall name="fstat" number="62"/>
+ <syscall name="fstat64" number="63"/>
+ <syscall name="getpagesize" number="64"/>
+ <syscall name="msync" number="65"/>
+ <syscall name="vfork" number="66"/>
+ <syscall name="pread64" number="67"/>
+ <syscall name="pwrite64" number="68"/>
+ <syscall name="mmap" number="71"/>
+ <syscall name="munmap" number="73"/>
+ <syscall name="mprotect" number="74"/>
+ <syscall name="madvise" number="75"/>
+ <syscall name="vhangup" number="76"/>
+ <syscall name="mincore" number="78"/>
+ <syscall name="getgroups" number="79"/>
+ <syscall name="setgroups" number="80"/>
+ <syscall name="getpgrp" number="81"/>
+ <syscall name="setitimer" number="83"/>
+ <syscall name="swapon" number="85"/>
+ <syscall name="getitimer" number="86"/>
+ <syscall name="sethostname" number="88"/>
+ <syscall name="dup2" number="90"/>
+ <syscall name="fcntl" number="92"/>
+ <syscall name="select" number="93"/>
+ <syscall name="fsync" number="95"/>
+ <syscall name="setpriority" number="96"/>
+ <syscall name="socket" number="97"/>
+ <syscall name="connect" number="98"/>
+ <syscall name="accept" number="99"/>
+ <syscall name="getpriority" number="100"/>
+ <syscall name="rt_sigreturn" number="101"/>
+ <syscall name="rt_sigaction" number="102"/>
+ <syscall name="rt_sigprocmask" number="103"/>
+ <syscall name="rt_sigpending" number="104"/>
+ <syscall name="rt_sigtimedwait" number="105"/>
+ <syscall name="rt_sigqueueinfo" number="106"/>
+ <syscall name="rt_sigsuspend" number="107"/>
+ <syscall name="setresuid" number="108"/>
+ <syscall name="getresuid" number="109"/>
+ <syscall name="setresgid" number="110"/>
+ <syscall name="getresgid" number="111"/>
+ <syscall name="recvmsg" number="113"/>
+ <syscall name="sendmsg" number="114"/>
+ <syscall name="gettimeofday" number="116"/>
+ <syscall name="getrusage" number="117"/>
+ <syscall name="getsockopt" number="118"/>
+ <syscall name="getcwd" number="119"/>
+ <syscall name="readv" number="120"/>
+ <syscall name="writev" number="121"/>
+ <syscall name="settimeofday" number="122"/>
+ <syscall name="fchown" number="123"/>
+ <syscall name="fchmod" number="124"/>
+ <syscall name="recvfrom" number="125"/>
+ <syscall name="setreuid" number="126"/>
+ <syscall name="setregid" number="127"/>
+ <syscall name="rename" number="128"/>
+ <syscall name="truncate" number="129"/>
+ <syscall name="ftruncate" number="130"/>
+ <syscall name="flock" number="131"/>
+ <syscall name="lstat64" number="132"/>
+ <syscall name="sendto" number="133"/>
+ <syscall name="shutdown" number="134"/>
+ <syscall name="socketpair" number="135"/>
+ <syscall name="mkdir" number="136"/>
+ <syscall name="rmdir" number="137"/>
+ <syscall name="utimes" number="138"/>
+ <syscall name="stat64" number="139"/>
+ <syscall name="sendfile64" number="140"/>
+ <syscall name="getpeername" number="141"/>
+ <syscall name="futex" number="142"/>
+ <syscall name="gettid" number="143"/>
+ <syscall name="getrlimit" number="144"/>
+ <syscall name="setrlimit" number="145"/>
+ <syscall name="pivot_root" number="146"/>
+ <syscall name="prctl" number="147"/>
+ <syscall name="pciconfig_read" number="148"/>
+ <syscall name="pciconfig_write" number="149"/>
+ <syscall name="getsockname" number="150"/>
+ <syscall name="inotify_init" number="151"/>
+ <syscall name="inotify_add_watch" number="152"/>
+ <syscall name="poll" number="153"/>
+ <syscall name="getdents64" number="154"/>
+ <syscall name="inotify_rm_watch" number="156"/>
+ <syscall name="statfs" number="157"/>
+ <syscall name="fstatfs" number="158"/>
+ <syscall name="umount" number="159"/>
+ <syscall name="sched_set_affinity" number="160"/>
+ <syscall name="sched_get_affinity" number="161"/>
+ <syscall name="getdomainname" number="162"/>
+ <syscall name="setdomainname" number="163"/>
+ <syscall name="utrap_install" number="164"/>
+ <syscall name="quotactl" number="165"/>
+ <syscall name="set_tid_address" number="166"/>
+ <syscall name="mount" number="167"/>
+ <syscall name="ustat" number="168"/>
+ <syscall name="setxattr" number="169"/>
+ <syscall name="lsetxattr" number="170"/>
+ <syscall name="fsetxattr" number="171"/>
+ <syscall name="getxattr" number="172"/>
+ <syscall name="lgetxattr" number="173"/>
+ <syscall name="getdents" number="174"/>
+ <syscall name="setsid" number="175"/>
+ <syscall name="fchdir" number="176"/>
+ <syscall name="fgetxattr" number="177"/>
+ <syscall name="listxattr" number="178"/>
+ <syscall name="llistxattr" number="179"/>
+ <syscall name="flistxattr" number="180"/>
+ <syscall name="removexattr" number="181"/>
+ <syscall name="lremovexattr" number="182"/>
+ <syscall name="sigpending" number="183"/>
+ <syscall name="query_module" number="184"/>
+ <syscall name="setpgid" number="185"/>
+ <syscall name="fremovexattr" number="186"/>
+ <syscall name="tkill" number="187"/>
+ <syscall name="exit_group" number="188"/>
+ <syscall name="uname" number="189"/>
+ <syscall name="init_module" number="190"/>
+ <syscall name="personality" number="191"/>
+ <syscall name="remap_file_pages" number="192"/>
+ <syscall name="epoll_create" number="193"/>
+ <syscall name="epoll_ctl" number="194"/>
+ <syscall name="epoll_wait" number="195"/>
+ <syscall name="ioprio_set" number="196"/>
+ <syscall name="getppid" number="197"/>
+ <syscall name="sigaction" number="198"/>
+ <syscall name="sgetmask" number="199"/>
+ <syscall name="ssetmask" number="200"/>
+ <syscall name="sigsuspend" number="201"/>
+ <syscall name="oldlstat" number="202"/>
+ <syscall name="uselib" number="203"/>
+ <syscall name="readdir" number="204"/>
+ <syscall name="readahead" number="205"/>
+ <syscall name="socketcall" number="206"/>
+ <syscall name="syslog" number="207"/>
+ <syscall name="lookup_dcookie" number="208"/>
+ <syscall name="fadvise64" number="209"/>
+ <syscall name="fadvise64_64" number="210"/>
+ <syscall name="tgkill" number="211"/>
+ <syscall name="waitpid" number="212"/>
+ <syscall name="swapoff" number="213"/>
+ <syscall name="sysinfo" number="214"/>
+ <syscall name="ipc" number="215"/>
+ <syscall name="sigreturn" number="216"/>
+ <syscall name="clone" number="217"/>
+ <syscall name="ioprio_get" number="218"/>
+ <syscall name="adjtimex" number="219"/>
+ <syscall name="sigprocmask" number="220"/>
+ <syscall name="create_module" number="221"/>
+ <syscall name="delete_module" number="222"/>
+ <syscall name="get_kernel_syms" number="223"/>
+ <syscall name="getpgid" number="224"/>
+ <syscall name="bdflush" number="225"/>
+ <syscall name="sysfs" number="226"/>
+ <syscall name="afs_syscall" number="227"/>
+ <syscall name="setfsuid" number="228"/>
+ <syscall name="setfsgid" number="229"/>
+ <syscall name="_newselect" number="230"/>
+ <syscall name="splice" number="232"/>
+ <syscall name="stime" number="233"/>
+ <syscall name="statfs64" number="234"/>
+ <syscall name="fstatfs64" number="235"/>
+ <syscall name="_llseek" number="236"/>
+ <syscall name="mlock" number="237"/>
+ <syscall name="munlock" number="238"/>
+ <syscall name="mlockall" number="239"/>
+ <syscall name="munlockall" number="240"/>
+ <syscall name="sched_setparam" number="241"/>
+ <syscall name="sched_getparam" number="242"/>
+ <syscall name="sched_setscheduler" number="243"/>
+ <syscall name="sched_getscheduler" number="244"/>
+ <syscall name="sched_yield" number="245"/>
+ <syscall name="sched_get_priority_max" number="246"/>
+ <syscall name="sched_get_priority_min" number="247"/>
+ <syscall name="sched_rr_get_interval" number="248"/>
+ <syscall name="nanosleep" number="249"/>
+ <syscall name="mremap" number="250"/>
+ <syscall name="_sysctl" number="251"/>
+ <syscall name="getsid" number="252"/>
+ <syscall name="fdatasync" number="253"/>
+ <syscall name="nfsservctl" number="254"/>
+ <syscall name="sync_file_range" number="255"/>
+ <syscall name="clock_settime" number="256"/>
+ <syscall name="clock_gettime" number="257"/>
+ <syscall name="clock_getres" number="258"/>
+ <syscall name="clock_nanosleep" number="259"/>
+ <syscall name="sched_getaffinity" number="260"/>
+ <syscall name="sched_setaffinity" number="261"/>
+ <syscall name="timer_settime" number="262"/>
+ <syscall name="timer_gettime" number="263"/>
+ <syscall name="timer_getoverrun" number="264"/>
+ <syscall name="timer_delete" number="265"/>
+ <syscall name="timer_create" number="266"/>
+ <syscall name="vserver" number="267"/>
+ <syscall name="io_setup" number="268"/>
+ <syscall name="io_destroy" number="269"/>
+ <syscall name="io_submit" number="270"/>
+ <syscall name="io_cancel" number="271"/>
+ <syscall name="io_getevents" number="272"/>
+ <syscall name="mq_open" number="273"/>
+ <syscall name="mq_unlink" number="274"/>
+ <syscall name="mq_timedsend" number="275"/>
+ <syscall name="mq_timedreceive" number="276"/>
+ <syscall name="mq_notify" number="277"/>
+ <syscall name="mq_getsetattr" number="278"/>
+ <syscall name="waitid" number="279"/>
+ <syscall name="tee" number="280"/>
+ <syscall name="add_key" number="281"/>
+ <syscall name="request_key" number="282"/>
+ <syscall name="keyctl" number="283"/>
+ <syscall name="openat" number="284"/>
+ <syscall name="mkdirat" number="285"/>
+ <syscall name="mknodat" number="286"/>
+ <syscall name="fchownat" number="287"/>
+ <syscall name="futimesat" number="288"/>
+ <syscall name="fstatat64" number="289"/>
+ <syscall name="unlinkat" number="290"/>
+ <syscall name="renameat" number="291"/>
+ <syscall name="linkat" number="292"/>
+ <syscall name="symlinkat" number="293"/>
+ <syscall name="readlinkat" number="294"/>
+ <syscall name="fchmodat" number="295"/>
+ <syscall name="faccessat" number="296"/>
+ <syscall name="pselect6" number="297"/>
+ <syscall name="ppoll" number="298"/>
+ <syscall name="unshare" number="299"/>
+ <syscall name="set_robust_list" number="300"/>
+ <syscall name="get_robust_list" number="301"/>
+ <syscall name="migrate_pages" number="302"/>
+ <syscall name="mbind" number="303"/>
+ <syscall name="get_mempolicy" number="304"/>
+ <syscall name="set_mempolicy" number="305"/>
+ <syscall name="kexec_load" number="306"/>
+ <syscall name="move_pages" number="307"/>
+ <syscall name="getcpu" number="308"/>
+ <syscall name="epoll_pwait" number="309"/>
+ <syscall name="utimensat" number="310"/>
+ <syscall name="signalfd" number="311"/>
+ <syscall name="timerfd_create" number="312"/>
+ <syscall name="eventfd" number="313"/>
+ <syscall name="fallocate" number="314"/>
+ <syscall name="timerfd_settime" number="315"/>
+ <syscall name="timerfd_gettime" number="316"/>
+ <syscall name="signalfd4" number="317"/>
+ <syscall name="eventfd2" number="318"/>
+ <syscall name="epoll_create1" number="319"/>
+ <syscall name="dup3" number="320"/>
+ <syscall name="pipe2" number="321"/>
+ <syscall name="inotify_init1" number="322"/>
+ <syscall name="accept4" number="323"/>
+ <syscall name="preadv" number="324"/>
+ <syscall name="pwritev" number="325"/>
+ <syscall name="rt_tgsigqueueinfo" number="326"/>
+ <syscall name="perf_event_open" number="327"/>
+ <syscall name="recvmmsg" number="328"/>
+</syscalls_info>
diff --git a/x86_64-linux-android/bin/ar b/x86_64-linux-android/bin/ar
new file mode 120000
index 0000000..96f839e
--- /dev/null
+++ b/x86_64-linux-android/bin/ar
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-ar \ No newline at end of file
diff --git a/x86_64-linux-android/bin/as b/x86_64-linux-android/bin/as
new file mode 120000
index 0000000..22c6e67
--- /dev/null
+++ b/x86_64-linux-android/bin/as
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-as \ No newline at end of file
diff --git a/x86_64-linux-android/bin/ld b/x86_64-linux-android/bin/ld
new file mode 120000
index 0000000..32f02e0
--- /dev/null
+++ b/x86_64-linux-android/bin/ld
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-ld \ No newline at end of file
diff --git a/x86_64-linux-android/bin/ld.bfd b/x86_64-linux-android/bin/ld.bfd
new file mode 120000
index 0000000..78dec82
--- /dev/null
+++ b/x86_64-linux-android/bin/ld.bfd
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-ld.bfd \ No newline at end of file
diff --git a/x86_64-linux-android/bin/ld.gold b/x86_64-linux-android/bin/ld.gold
new file mode 120000
index 0000000..cc9a1e9
--- /dev/null
+++ b/x86_64-linux-android/bin/ld.gold
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-ld.gold \ No newline at end of file
diff --git a/x86_64-linux-android/bin/ld.mcld b/x86_64-linux-android/bin/ld.mcld
new file mode 120000
index 0000000..027d1e9
--- /dev/null
+++ b/x86_64-linux-android/bin/ld.mcld
@@ -0,0 +1 @@
+../../../../../../toolchains/llvm-3.4/prebuilt/linux-x86_64/bin/ld.mcld \ No newline at end of file
diff --git a/x86_64-linux-android/bin/nm b/x86_64-linux-android/bin/nm
new file mode 120000
index 0000000..2e87749
--- /dev/null
+++ b/x86_64-linux-android/bin/nm
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-nm \ No newline at end of file
diff --git a/x86_64-linux-android/bin/objcopy b/x86_64-linux-android/bin/objcopy
new file mode 120000
index 0000000..22d7916
--- /dev/null
+++ b/x86_64-linux-android/bin/objcopy
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-objcopy \ No newline at end of file
diff --git a/x86_64-linux-android/bin/objdump b/x86_64-linux-android/bin/objdump
new file mode 120000
index 0000000..150ba55
--- /dev/null
+++ b/x86_64-linux-android/bin/objdump
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-objdump \ No newline at end of file
diff --git a/x86_64-linux-android/bin/ranlib b/x86_64-linux-android/bin/ranlib
new file mode 120000
index 0000000..13f26b2
--- /dev/null
+++ b/x86_64-linux-android/bin/ranlib
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-ranlib \ No newline at end of file
diff --git a/x86_64-linux-android/bin/strip b/x86_64-linux-android/bin/strip
new file mode 120000
index 0000000..7404ebb
--- /dev/null
+++ b/x86_64-linux-android/bin/strip
@@ -0,0 +1 @@
+../../bin/x86_64-linux-android-strip \ No newline at end of file
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.x b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.x
new file mode 100644
index 0000000..0ad5063
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.x
@@ -0,0 +1,227 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xbn b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xbn
new file mode 100644
index 0000000..fd61792
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xbn
@@ -0,0 +1,224 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = .;
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xc b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xc
new file mode 100644
index 0000000..96f8ad3
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xc
@@ -0,0 +1,228 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xd b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xd
new file mode 100644
index 0000000..eeecf60
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xd
@@ -0,0 +1,226 @@
+/* Script for ld -pie: link position independent executable */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdc b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdc
new file mode 100644
index 0000000..a19b462
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdc
@@ -0,0 +1,228 @@
+/* Script for -pie -z combreloc: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdw b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdw
new file mode 100644
index 0000000..ee71093
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xdw
@@ -0,0 +1,227 @@
+/* Script for -pie -z combreloc -z now -z relro: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xn b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xn
new file mode 100644
index 0000000..7580acb
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xn
@@ -0,0 +1,226 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xr b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xr
new file mode 100644
index 0000000..de6c8da
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xr
@@ -0,0 +1,154 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.ifunc 0 : { *(.rela.ifunc) }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xs b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xs
new file mode 100644
index 0000000..54cb13b
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xs
@@ -0,0 +1,217 @@
+/* Script for ld --shared: link shared library */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsc b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsc
new file mode 100644
index 0000000..521515d
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsc
@@ -0,0 +1,220 @@
+/* Script for --shared -z combreloc: shared library, combine & sort relocs */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsw b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsw
new file mode 100644
index 0000000..aada16e
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xsw
@@ -0,0 +1,218 @@
+/* Script for --shared -z combreloc -z now -z relro: shared library, combine & sort relocs */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xu b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xu
new file mode 100644
index 0000000..5c5fd9f
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xu
@@ -0,0 +1,155 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.ifunc 0 : { *(.rela.ifunc) }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xw b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xw
new file mode 100644
index 0000000..a3119b0
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf32_x86_64.xw
@@ -0,0 +1,227 @@
+/* Script for -z combreloc -z now -z relro: combine and sort reloc sections */
+OUTPUT_FORMAT("elf32-x86-64", "elf32-x86-64",
+ "elf32-x86-64")
+OUTPUT_ARCH(i386:x64-32)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 32 / 8 : 1);
+ }
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.x b/x86_64-linux-android/lib/ldscripts/elf_i386.x
new file mode 100644
index 0000000..75807ad
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.x
@@ -0,0 +1,209 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xbn b/x86_64-linux-android/lib/ldscripts/elf_i386.xbn
new file mode 100644
index 0000000..137fdd2
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xbn
@@ -0,0 +1,206 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = .;
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xc b/x86_64-linux-android/lib/ldscripts/elf_i386.xc
new file mode 100644
index 0000000..41f2a97
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xc
@@ -0,0 +1,211 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xd b/x86_64-linux-android/lib/ldscripts/elf_i386.xd
new file mode 100644
index 0000000..62119c1
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xd
@@ -0,0 +1,208 @@
+/* Script for ld -pie: link position independent executable */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xdc b/x86_64-linux-android/lib/ldscripts/elf_i386.xdc
new file mode 100644
index 0000000..e7d5332
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xdc
@@ -0,0 +1,211 @@
+/* Script for -pie -z combreloc: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xdw b/x86_64-linux-android/lib/ldscripts/elf_i386.xdw
new file mode 100644
index 0000000..caf905a
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xdw
@@ -0,0 +1,210 @@
+/* Script for -pie -z combreloc -z now -z relro: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xn b/x86_64-linux-android/lib/ldscripts/elf_i386.xn
new file mode 100644
index 0000000..df0b9c7
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xn
@@ -0,0 +1,208 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xr b/x86_64-linux-android/lib/ldscripts/elf_i386.xr
new file mode 100644
index 0000000..0062568
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xr
@@ -0,0 +1,137 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rel.init 0 : { *(.rel.init) }
+ .rel.text 0 : { *(.rel.text) }
+ .rel.fini 0 : { *(.rel.fini) }
+ .rel.rodata 0 : { *(.rel.rodata) }
+ .rel.data.rel.ro 0 : { *(.rel.data.rel.ro) }
+ .rel.data 0 : { *(.rel.data) }
+ .rel.tdata 0 : { *(.rel.tdata) }
+ .rel.tbss 0 : { *(.rel.tbss) }
+ .rel.ctors 0 : { *(.rel.ctors) }
+ .rel.dtors 0 : { *(.rel.dtors) }
+ .rel.got 0 : { *(.rel.got) }
+ .rel.bss 0 : { *(.rel.bss) }
+ .rel.ifunc 0 : { *(.rel.ifunc) }
+ .rel.plt 0 :
+ {
+ *(.rel.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xs b/x86_64-linux-android/lib/ldscripts/elf_i386.xs
new file mode 100644
index 0000000..b534aa7
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xs
@@ -0,0 +1,199 @@
+/* Script for ld --shared: link shared library */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ *(.rel.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xsc b/x86_64-linux-android/lib/ldscripts/elf_i386.xsc
new file mode 100644
index 0000000..ab707bf
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xsc
@@ -0,0 +1,203 @@
+/* Script for --shared -z combreloc: shared library, combine & sort relocs */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ *(.rel.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 12 ? 12 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xsw b/x86_64-linux-android/lib/ldscripts/elf_i386.xsw
new file mode 100644
index 0000000..eb92bf4
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xsw
@@ -0,0 +1,201 @@
+/* Script for --shared -z combreloc -z now -z relro: shared library, combine & sort relocs */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ *(.rel.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xu b/x86_64-linux-android/lib/ldscripts/elf_i386.xu
new file mode 100644
index 0000000..5726faa
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xu
@@ -0,0 +1,138 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rel.init 0 : { *(.rel.init) }
+ .rel.text 0 : { *(.rel.text) }
+ .rel.fini 0 : { *(.rel.fini) }
+ .rel.rodata 0 : { *(.rel.rodata) }
+ .rel.data.rel.ro 0 : { *(.rel.data.rel.ro) }
+ .rel.data 0 : { *(.rel.data) }
+ .rel.tdata 0 : { *(.rel.tdata) }
+ .rel.tbss 0 : { *(.rel.tbss) }
+ .rel.ctors 0 : { *(.rel.ctors) }
+ .rel.dtors 0 : { *(.rel.dtors) }
+ .rel.got 0 : { *(.rel.got) }
+ .rel.bss 0 : { *(.rel.bss) }
+ .rel.ifunc 0 : { *(.rel.ifunc) }
+ .rel.plt 0 :
+ {
+ *(.rel.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_i386.xw b/x86_64-linux-android/lib/ldscripts/elf_i386.xw
new file mode 100644
index 0000000..f25dd62
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_i386.xw
@@ -0,0 +1,210 @@
+/* Script for -z combreloc -z now -z relro: combine and sort reloc sections */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386",
+ "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data.rel.ro .rel.data.rel.ro.* .rel.gnu.linkonce.d.rel.ro.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ *(.rel.ifunc)
+ }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(32 / 8);
+ }
+ . = ALIGN(32 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ . = ALIGN(32 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.x b/x86_64-linux-android/lib/ldscripts/elf_k1om.x
new file mode 100644
index 0000000..f20c044
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.x
@@ -0,0 +1,230 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xbn b/x86_64-linux-android/lib/ldscripts/elf_k1om.xbn
new file mode 100644
index 0000000..1694fe1
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xbn
@@ -0,0 +1,227 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = .;
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xc b/x86_64-linux-android/lib/ldscripts/elf_k1om.xc
new file mode 100644
index 0000000..93708bb
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xc
@@ -0,0 +1,230 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xd b/x86_64-linux-android/lib/ldscripts/elf_k1om.xd
new file mode 100644
index 0000000..ab78d27
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xd
@@ -0,0 +1,229 @@
+/* Script for ld -pie: link position independent executable */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xdc b/x86_64-linux-android/lib/ldscripts/elf_k1om.xdc
new file mode 100644
index 0000000..6943233
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xdc
@@ -0,0 +1,230 @@
+/* Script for -pie -z combreloc: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xdw b/x86_64-linux-android/lib/ldscripts/elf_k1om.xdw
new file mode 100644
index 0000000..ed7be82
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xdw
@@ -0,0 +1,229 @@
+/* Script for -pie -z combreloc -z now -z relro: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xn b/x86_64-linux-android/lib/ldscripts/elf_k1om.xn
new file mode 100644
index 0000000..977e042
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xn
@@ -0,0 +1,229 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xr b/x86_64-linux-android/lib/ldscripts/elf_k1om.xr
new file mode 100644
index 0000000..fb08769
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xr
@@ -0,0 +1,158 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.iplt 0 :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) }
+ .iplt 0 : { *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xs b/x86_64-linux-android/lib/ldscripts/elf_k1om.xs
new file mode 100644
index 0000000..e953374
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xs
@@ -0,0 +1,220 @@
+/* Script for ld --shared: link shared library */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xsc b/x86_64-linux-android/lib/ldscripts/elf_k1om.xsc
new file mode 100644
index 0000000..f98f7a9
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xsc
@@ -0,0 +1,220 @@
+/* Script for --shared -z combreloc: shared library, combine & sort relocs */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xsw b/x86_64-linux-android/lib/ldscripts/elf_k1om.xsw
new file mode 100644
index 0000000..e620cd3
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xsw
@@ -0,0 +1,218 @@
+/* Script for --shared -z combreloc -z now -z relro: shared library, combine & sort relocs */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xu b/x86_64-linux-android/lib/ldscripts/elf_k1om.xu
new file mode 100644
index 0000000..321a32a
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xu
@@ -0,0 +1,159 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.iplt 0 :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) }
+ .iplt 0 : { *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_k1om.xw b/x86_64-linux-android/lib/ldscripts/elf_k1om.xw
new file mode 100644
index 0000000..2dea8c1
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_k1om.xw
@@ -0,0 +1,229 @@
+/* Script for -z combreloc -z now -z relro: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-k1om", "elf64-k1om",
+ "elf64-k1om")
+OUTPUT_ARCH(k1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.x b/x86_64-linux-android/lib/ldscripts/elf_l1om.x
new file mode 100644
index 0000000..ae3103c
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.x
@@ -0,0 +1,230 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xbn b/x86_64-linux-android/lib/ldscripts/elf_l1om.xbn
new file mode 100644
index 0000000..433891e
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xbn
@@ -0,0 +1,227 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = .;
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xc b/x86_64-linux-android/lib/ldscripts/elf_l1om.xc
new file mode 100644
index 0000000..6d3e65e
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xc
@@ -0,0 +1,230 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xd b/x86_64-linux-android/lib/ldscripts/elf_l1om.xd
new file mode 100644
index 0000000..36b49fb
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xd
@@ -0,0 +1,229 @@
+/* Script for ld -pie: link position independent executable */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xdc b/x86_64-linux-android/lib/ldscripts/elf_l1om.xdc
new file mode 100644
index 0000000..33a359e
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xdc
@@ -0,0 +1,230 @@
+/* Script for -pie -z combreloc: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xdw b/x86_64-linux-android/lib/ldscripts/elf_l1om.xdw
new file mode 100644
index 0000000..412a745
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xdw
@@ -0,0 +1,229 @@
+/* Script for -pie -z combreloc -z now -z relro: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xn b/x86_64-linux-android/lib/ldscripts/elf_l1om.xn
new file mode 100644
index 0000000..2a035b3
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xn
@@ -0,0 +1,229 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xr b/x86_64-linux-android/lib/ldscripts/elf_l1om.xr
new file mode 100644
index 0000000..f9bd3e9
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xr
@@ -0,0 +1,158 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.iplt 0 :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) }
+ .iplt 0 : { *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xs b/x86_64-linux-android/lib/ldscripts/elf_l1om.xs
new file mode 100644
index 0000000..bc8107a
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xs
@@ -0,0 +1,220 @@
+/* Script for ld --shared: link shared library */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.iplt :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xsc b/x86_64-linux-android/lib/ldscripts/elf_l1om.xsc
new file mode 100644
index 0000000..3b6e2b5
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xsc
@@ -0,0 +1,220 @@
+/* Script for --shared -z combreloc: shared library, combine & sort relocs */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xsw b/x86_64-linux-android/lib/ldscripts/elf_l1om.xsw
new file mode 100644
index 0000000..23422b2
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xsw
@@ -0,0 +1,218 @@
+/* Script for --shared -z combreloc -z now -z relro: shared library, combine & sort relocs */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.iplt)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xu b/x86_64-linux-android/lib/ldscripts/elf_l1om.xu
new file mode 100644
index 0000000..1a53594
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xu
@@ -0,0 +1,159 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.iplt 0 :
+ {
+ *(.rela.iplt)
+ }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) }
+ .iplt 0 : { *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_l1om.xw b/x86_64-linux-android/lib/ldscripts/elf_l1om.xw
new file mode 100644
index 0000000..dccca0a
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_l1om.xw
@@ -0,0 +1,229 @@
+/* Script for -z combreloc -z now -z relro: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-l1om", "elf64-l1om",
+ "elf64-l1om")
+OUTPUT_ARCH(l1om)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) }
+ .iplt : { *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.x b/x86_64-linux-android/lib/ldscripts/elf_x86_64.x
new file mode 100644
index 0000000..b7ad5ec
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.x
@@ -0,0 +1,227 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xbn b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xbn
new file mode 100644
index 0000000..67a92e0
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xbn
@@ -0,0 +1,224 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = .;
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xc b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xc
new file mode 100644
index 0000000..7b2a9c5
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xc
@@ -0,0 +1,228 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xd b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xd
new file mode 100644
index 0000000..0f41ca0
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xd
@@ -0,0 +1,226 @@
+/* Script for ld -pie: link position independent executable */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdc b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdc
new file mode 100644
index 0000000..6aedb53
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdc
@@ -0,0 +1,228 @@
+/* Script for -pie -z combreloc: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdw b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdw
new file mode 100644
index 0000000..4631e1d
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xdw
@@ -0,0 +1,227 @@
+/* Script for -pie -z combreloc -z now -z relro: position independent executable, combine & sort relocs */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0); . = 0 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xn b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xn
new file mode 100644
index 0000000..6aa7208
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xn
@@ -0,0 +1,226 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xr b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xr
new file mode 100644
index 0000000..7f92872
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xr
@@ -0,0 +1,154 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.ifunc 0 : { *(.rela.ifunc) }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xs b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xs
new file mode 100644
index 0000000..d75a7f8
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xs
@@ -0,0 +1,217 @@
+/* Script for ld --shared: link shared library */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.init : { *(.rela.init) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rela.fini : { *(.rela.fini) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rela.got : { *(.rela.got) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) }
+ .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) }
+ .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) }
+ .rela.ifunc : { *(.rela.ifunc) }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsc b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsc
new file mode 100644
index 0000000..379c1cd
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsc
@@ -0,0 +1,220 @@
+/* Script for --shared -z combreloc: shared library, combine & sort relocs */
+/* Modified for Android. */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
+ .got.plt : { *(.got.plt) *(.igot.plt) }
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsw b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsw
new file mode 100644
index 0000000..ada2e50
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xsw
@@ -0,0 +1,218 @@
+/* Script for --shared -z combreloc -z now -z relro: shared library, combine & sort relocs */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0 + SIZEOF_HEADERS;
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ *(.rela.iplt)
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xu b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xu
new file mode 100644
index 0000000..b1d4b5b
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xu
@@ -0,0 +1,155 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ /* For some reason, the Solaris linker makes bad executables
+ if gld -r is used and the intermediate file has sections starting
+ at non-zero addresses. Could be a Solaris ld bug, could be a GNU ld
+ bug. But for now assigning the zero vmas works. */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ .interp 0 : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash 0 : { *(.hash) }
+ .gnu.hash 0 : { *(.gnu.hash) }
+ .dynsym 0 : { *(.dynsym) }
+ .dynstr 0 : { *(.dynstr) }
+ .gnu.version 0 : { *(.gnu.version) }
+ .gnu.version_d 0: { *(.gnu.version_d) }
+ .gnu.version_r 0: { *(.gnu.version_r) }
+ .rela.init 0 : { *(.rela.init) }
+ .rela.text 0 : { *(.rela.text) }
+ .rela.fini 0 : { *(.rela.fini) }
+ .rela.rodata 0 : { *(.rela.rodata) }
+ .rela.data.rel.ro 0 : { *(.rela.data.rel.ro) }
+ .rela.data 0 : { *(.rela.data) }
+ .rela.tdata 0 : { *(.rela.tdata) }
+ .rela.tbss 0 : { *(.rela.tbss) }
+ .rela.ctors 0 : { *(.rela.ctors) }
+ .rela.dtors 0 : { *(.rela.dtors) }
+ .rela.got 0 : { *(.rela.got) }
+ .rela.bss 0 : { *(.rela.bss) }
+ .rela.ldata 0 : { *(.rela.ldata) }
+ .rela.lbss 0 : { *(.rela.lbss) }
+ .rela.lrodata 0 : { *(.rela.lrodata) }
+ .rela.ifunc 0 : { *(.rela.ifunc) }
+ .rela.plt 0 :
+ {
+ *(.rela.plt)
+ }
+ .init 0 :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt 0 : { *(.plt) *(.iplt) }
+ .text 0 :
+ {
+ *(.text .stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini 0 :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ .rodata 0 : { *(.rodata) }
+ .rodata1 0 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame 0 : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges 0 : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ /* Exception handling */
+ .eh_frame 0 : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table 0 : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges 0 : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata 0 : { *(.tdata) }
+ .tbss 0 : { *(.tbss) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ .preinit_array 0 :
+ {
+ KEEP (*(.preinit_array))
+ }
+ .jcr 0 : { KEEP (*(.jcr)) }
+ .dynamic 0 : { *(.dynamic) }
+ .got 0 : { *(.got) *(.igot) }
+ .got.plt 0 : { *(.got.plt) *(.igot.plt) }
+ .data 0 :
+ {
+ *(.data)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 0 : { *(.data1) }
+ .bss 0 :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ }
+ .lbss 0 :
+ {
+ *(.dynlbss)
+ *(.lbss)
+ *(LARGE_COMMON)
+ }
+ .lrodata 0 :
+ {
+ *(.lrodata)
+ }
+ .ldata 0 :
+ {
+ *(.ldata)
+ }
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/elf_x86_64.xw b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xw
new file mode 100644
index 0000000..21309d9
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/elf_x86_64.xw
@@ -0,0 +1,227 @@
+/* Script for -z combreloc -z now -z relro: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
+ "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x400000); . = 0x400000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
+ *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
+ *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
+ *(.rela.ifunc)
+ }
+ .rela.plt :
+ {
+ *(.rela.plt)
+ PROVIDE_HIDDEN (__rela_iplt_start = .);
+ *(.rela.iplt)
+ PROVIDE_HIDDEN (__rela_iplt_end = .);
+ }
+ .init :
+ {
+ KEEP (*(SORT_NONE(.init)))
+ }
+ .plt : { *(.plt) *(.iplt) }
+ .text :
+ {
+ *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+ *(.text.exit .text.exit.*)
+ *(.text.startup .text.startup.*)
+ *(.text.hot .text.hot.*)
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ }
+ .fini :
+ {
+ KEEP (*(SORT_NONE(.fini)))
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
+ .gcc_except_table.*) }
+ /* These sections are generated by the Sun/Oracle C++ compiler. */
+ .exception_ranges : ONLY_IF_RO { *(.exception_ranges
+ .exception_ranges*) }
+ /* Adjust the address for the data segment. For 32 bits we want to align
+ at exactly a page boundary to make life easier for apriori. */
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ /* Exception handling */
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
+ .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
+ /* Thread Local Storage sections */
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(32 / 8);
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ .preinit_array :
+ {
+ KEEP (*(.preinit_array))
+ }
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ PROVIDE_HIDDEN (__init_array_start = .);
+ .init_array :
+ {
+ KEEP (*crtbegin*.o(.init_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+ KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .ctors))
+ }
+ PROVIDE_HIDDEN (__init_array_end = .);
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ .fini_array :
+ {
+ KEEP (*crtbegin*.o(.fini_array))
+ KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+ KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin*.o *crtend.o *crtend*.o ) .dtors))
+ }
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
+ .dynamic : { *(.dynamic) }
+ .got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
+ . = DATA_SEGMENT_RELRO_END (0, .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ _edata = .; PROVIDE (edata = .);
+ . = .;
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ .lbss :
+ {
+ *(.dynlbss)
+ *(.lbss .lbss.* .gnu.linkonce.lb.*)
+ *(LARGE_COMMON)
+ }
+ . = ALIGN(64 / 8);
+ . = SEGMENT_START("ldata-segment", .);
+ .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
+ }
+ .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
+ {
+ *(.ldata .ldata.* .gnu.linkonce.l.*)
+ . = ALIGN(. != 0 ? 64 / 8 : 1);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ _bss_end__ = . ; __bss_end__ = . ; __end__ = . ;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ /* DWARF Extension. */
+ .debug_macro 0 : { *(.debug_macro) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) *(.mdebug.*) }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/i386linux.x b/x86_64-linux-android/lib/ldscripts/i386linux.x
new file mode 100644
index 0000000..68bc5a0
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/i386linux.x
@@ -0,0 +1,47 @@
+/* Default linker script, for normal executables */
+/* Modified for Android. */
+OUTPUT_FORMAT("a.out-i386-linux", "a.out-i386-linux",
+ "a.out-i386-linux")
+OUTPUT_ARCH(i386)
+PROVIDE (__stack = 0);
+SECTIONS
+{
+ . = 0x1020;
+ .text :
+ {
+ CREATE_OBJECT_SYMBOLS
+ *(.text)
+ /* The next six sections are for SunOS dynamic linking. The order
+ is important. */
+ *(.dynrel)
+ *(.hash)
+ *(.dynsym)
+ *(.dynstr)
+ *(.rules)
+ *(.need)
+ _etext = .;
+ __etext = .;
+ }
+ . = ALIGN(0x1000);
+ .data :
+ {
+ /* The first three sections are for SunOS dynamic linking. */
+ *(.dynamic)
+ *(.got)
+ *(.plt)
+ *(.data)
+ *(.linux-dynamic) /* For Linux dynamic linking. */
+ CONSTRUCTORS
+ _edata = .;
+ __edata = .;
+ }
+ .bss :
+ {
+ __bss_start = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(4);
+ _end = . ;
+ __end = . ;
+ }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/i386linux.xbn b/x86_64-linux-android/lib/ldscripts/i386linux.xbn
new file mode 100644
index 0000000..91b1e91
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/i386linux.xbn
@@ -0,0 +1,46 @@
+/* Script for -N: mix text and data on same page; don't align data */
+OUTPUT_FORMAT("a.out-i386-linux", "a.out-i386-linux",
+ "a.out-i386-linux")
+OUTPUT_ARCH(i386)
+PROVIDE (__stack = 0);
+SECTIONS
+{
+ . = 0;
+ .text :
+ {
+ CREATE_OBJECT_SYMBOLS
+ *(.text)
+ /* The next six sections are for SunOS dynamic linking. The order
+ is important. */
+ *(.dynrel)
+ *(.hash)
+ *(.dynsym)
+ *(.dynstr)
+ *(.rules)
+ *(.need)
+ _etext = .;
+ __etext = .;
+ }
+ . = .;
+ .data :
+ {
+ /* The first three sections are for SunOS dynamic linking. */
+ *(.dynamic)
+ *(.got)
+ *(.plt)
+ *(.data)
+ *(.linux-dynamic) /* For Linux dynamic linking. */
+ CONSTRUCTORS
+ _edata = .;
+ __edata = .;
+ }
+ .bss :
+ {
+ __bss_start = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(4);
+ _end = . ;
+ __end = . ;
+ }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/i386linux.xn b/x86_64-linux-android/lib/ldscripts/i386linux.xn
new file mode 100644
index 0000000..6185656
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/i386linux.xn
@@ -0,0 +1,46 @@
+/* Script for -n: mix text and data on same page */
+OUTPUT_FORMAT("a.out-i386-linux", "a.out-i386-linux",
+ "a.out-i386-linux")
+OUTPUT_ARCH(i386)
+PROVIDE (__stack = 0);
+SECTIONS
+{
+ . = 0;
+ .text :
+ {
+ CREATE_OBJECT_SYMBOLS
+ *(.text)
+ /* The next six sections are for SunOS dynamic linking. The order
+ is important. */
+ *(.dynrel)
+ *(.hash)
+ *(.dynsym)
+ *(.dynstr)
+ *(.rules)
+ *(.need)
+ _etext = .;
+ __etext = .;
+ }
+ . = ALIGN(0x1000);
+ .data :
+ {
+ /* The first three sections are for SunOS dynamic linking. */
+ *(.dynamic)
+ *(.got)
+ *(.plt)
+ *(.data)
+ *(.linux-dynamic) /* For Linux dynamic linking. */
+ CONSTRUCTORS
+ _edata = .;
+ __edata = .;
+ }
+ .bss :
+ {
+ __bss_start = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(4);
+ _end = . ;
+ __end = . ;
+ }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/i386linux.xr b/x86_64-linux-android/lib/ldscripts/i386linux.xr
new file mode 100644
index 0000000..8a33f28
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/i386linux.xr
@@ -0,0 +1,37 @@
+/* Script for ld -r: link without relocation */
+OUTPUT_FORMAT("a.out-i386-linux", "a.out-i386-linux",
+ "a.out-i386-linux")
+OUTPUT_ARCH(i386)
+SECTIONS
+{
+ .text :
+ {
+ CREATE_OBJECT_SYMBOLS
+ *(.text)
+ /* The next six sections are for SunOS dynamic linking. The order
+ is important. */
+ *(.dynrel)
+ *(.hash)
+ *(.dynsym)
+ *(.dynstr)
+ *(.rules)
+ *(.need)
+ }
+ .data :
+ {
+ /* The first three sections are for SunOS dynamic linking. */
+ *(.dynamic)
+ *(.got)
+ *(.plt)
+ *(.data)
+ *(.linux-dynamic) /* For Linux dynamic linking. */
+ }
+ .bss :
+ {
+ ;
+ *(.bss)
+ *(COMMON)
+ ;
+ ;
+ }
+}
diff --git a/x86_64-linux-android/lib/ldscripts/i386linux.xu b/x86_64-linux-android/lib/ldscripts/i386linux.xu
new file mode 100644
index 0000000..6847100
--- /dev/null
+++ b/x86_64-linux-android/lib/ldscripts/i386linux.xu
@@ -0,0 +1,38 @@
+/* Script for ld -Ur: link w/out relocation, do create constructors */
+OUTPUT_FORMAT("a.out-i386-linux", "a.out-i386-linux",
+ "a.out-i386-linux")
+OUTPUT_ARCH(i386)
+SECTIONS
+{
+ .text :
+ {
+ CREATE_OBJECT_SYMBOLS
+ *(.text)
+ /* The next six sections are for SunOS dynamic linking. The order
+ is important. */
+ *(.dynrel)
+ *(.hash)
+ *(.dynsym)
+ *(.dynstr)
+ *(.rules)
+ *(.need)
+ }
+ .data :
+ {
+ /* The first three sections are for SunOS dynamic linking. */
+ *(.dynamic)
+ *(.got)
+ *(.plt)
+ *(.data)
+ *(.linux-dynamic) /* For Linux dynamic linking. */
+ CONSTRUCTORS
+ }
+ .bss :
+ {
+ ;
+ *(.bss)
+ *(COMMON)
+ ;
+ ;
+ }
+}
diff --git a/x86_64-linux-android/lib/libatomic.a b/x86_64-linux-android/lib/libatomic.a
new file mode 100644
index 0000000..2e7be55
--- /dev/null
+++ b/x86_64-linux-android/lib/libatomic.a
Binary files differ
diff --git a/x86_64-linux-android/lib/libgomp.a b/x86_64-linux-android/lib/libgomp.a
new file mode 100644
index 0000000..ddf6331
--- /dev/null
+++ b/x86_64-linux-android/lib/libgomp.a
Binary files differ
diff --git a/x86_64-linux-android/lib/libgomp.spec b/x86_64-linux-android/lib/libgomp.spec
new file mode 100644
index 0000000..2fd7721
--- /dev/null
+++ b/x86_64-linux-android/lib/libgomp.spec
@@ -0,0 +1,3 @@
+# This spec file is read by gcc when linking. It is used to specify the
+# standard libraries we need in order to link with libgomp.
+*link_gomp: -lgomp
diff --git a/x86_64-linux-android/lib64/libatomic.a b/x86_64-linux-android/lib64/libatomic.a
new file mode 100644
index 0000000..6098208
--- /dev/null
+++ b/x86_64-linux-android/lib64/libatomic.a
Binary files differ
diff --git a/x86_64-linux-android/lib64/libgomp.a b/x86_64-linux-android/lib64/libgomp.a
new file mode 100644
index 0000000..d04ed56
--- /dev/null
+++ b/x86_64-linux-android/lib64/libgomp.a
Binary files differ
diff --git a/x86_64-linux-android/lib64/libgomp.spec b/x86_64-linux-android/lib64/libgomp.spec
new file mode 100644
index 0000000..2fd7721
--- /dev/null
+++ b/x86_64-linux-android/lib64/libgomp.spec
@@ -0,0 +1,3 @@
+# This spec file is read by gcc when linking. It is used to specify the
+# standard libraries we need in order to link with libgomp.
+*link_gomp: -lgomp
diff --git a/x86_64-linux-android/libx32/libatomic.a b/x86_64-linux-android/libx32/libatomic.a
new file mode 100644
index 0000000..a0561dd
--- /dev/null
+++ b/x86_64-linux-android/libx32/libatomic.a
Binary files differ
diff --git a/x86_64-linux-android/libx32/libgomp.a b/x86_64-linux-android/libx32/libgomp.a
new file mode 100644
index 0000000..a63d951
--- /dev/null
+++ b/x86_64-linux-android/libx32/libgomp.a
Binary files differ
diff --git a/x86_64-linux-android/libx32/libgomp.spec b/x86_64-linux-android/libx32/libgomp.spec
new file mode 100644
index 0000000..2fd7721
--- /dev/null
+++ b/x86_64-linux-android/libx32/libgomp.spec
@@ -0,0 +1,3 @@
+# This spec file is read by gcc when linking. It is used to specify the
+# standard libraries we need in order to link with libgomp.
+*link_gomp: -lgomp