diff options
author | Elliott Hughes <enh@google.com> | 2016-07-15 16:50:09 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2016-07-15 16:50:09 +0000 |
commit | ae1136b8c5614a69a26fa68fade725fbfb0c2571 (patch) | |
tree | 6087083c35131860fa72456a56e20c03ecd8f881 | |
parent | 55acd713d02f61c984e10ff2d4cef775a7fb597c (diff) | |
parent | 0653ca33e7ffbb54f09f97c7924bac17aca8d770 (diff) | |
download | jpeg-ae1136b8c5614a69a26fa68fade725fbfb0c2571.tar.gz |
Merge "Fix AArch64 ABI conformance issue in SIMD code."HEADandroid-n-mr2-preview-2android-n-mr2-preview-1android-n-mr1-preview-2android-n-mr1-preview-1mastermain
-rw-r--r-- | jsimd_arm64_neon.S | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/jsimd_arm64_neon.S b/jsimd_arm64_neon.S index 099d4b5..26a8b11 100644 --- a/jsimd_arm64_neon.S +++ b/jsimd_arm64_neon.S @@ -237,6 +237,11 @@ asm_function jsimd_idct_islow_neon TMP3 .req x2 TMP4 .req x15 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + ROW0L .req v16 ROW0R .req v17 ROW1L .req v18 @@ -794,6 +799,11 @@ asm_function jsimd_idct_ifast_neon TMP4 .req x22 TMP5 .req x23 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + /* Load and dequantize coefficients into NEON registers * with the following allocation: * 0 1 2 3 | 4 5 6 7 @@ -1167,6 +1177,11 @@ asm_function jsimd_idct_4x4_neon TMP3 .req x2 TMP4 .req x15 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + /* Save all used NEON registers */ sub sp, sp, 272 str x15, [sp], 16 @@ -1362,6 +1377,12 @@ asm_function jsimd_idct_2x2_neon TMP1 .req x0 TMP2 .req x15 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + + /* vpush {v8.4h - v15.4h} ; not available */ sub sp, sp, 208 str x15, [sp], 16 @@ -1709,11 +1730,11 @@ Ljsimd_ycc_\colorid\()_neon_consts: .short -128, -128, -128, -128 asm_function jsimd_ycc_\colorid\()_convert_neon - OUTPUT_WIDTH .req x0 + OUTPUT_WIDTH .req w0 INPUT_BUF .req x1 - INPUT_ROW .req x2 + INPUT_ROW .req w2 OUTPUT_BUF .req x3 - NUM_ROWS .req x4 + NUM_ROWS .req w4 INPUT_BUF0 .req x5 INPUT_BUF1 .req x6 @@ -1723,7 +1744,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon Y .req x8 U .req x9 V .req x10 - N .req x15 + N .req w15 sub sp, sp, 336 str x15, [sp], 16 @@ -1760,11 +1781,10 @@ asm_function jsimd_ycc_\colorid\()_convert_neon cmp NUM_ROWS, #1 b.lt 9f 0: - lsl x16, INPUT_ROW, #3 - ldr Y, [INPUT_BUF0, x16] - ldr U, [INPUT_BUF1, x16] + ldr Y, [INPUT_BUF0, INPUT_ROW, uxtw #3] + ldr U, [INPUT_BUF1, INPUT_ROW, uxtw #3] mov N, OUTPUT_WIDTH - ldr V, [INPUT_BUF2, x16] + ldr V, [INPUT_BUF2, INPUT_ROW, uxtw #3] add INPUT_ROW, INPUT_ROW, #1 ldr RGB, [OUTPUT_BUF], #8 |