summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictoria Zhislina <niva213@gmail.com>2018-06-15 16:38:16 +0300
committerVictoria Zhislina <niva213@gmail.com>2018-06-15 16:38:16 +0300
commit7c6fd8a6ba548bc2a5499b2ef078f1ada023fb44 (patch)
treec72800d0d19d08affb2a65ed22775831b02a2eea
parente19d71749ab5060b31d8187107a000450aa9b205 (diff)
downloadneon_2_sse-7c6fd8a6ba548bc2a5499b2ef078f1ada023fb44.tar.gz
vabd(q)_u8 and vabd(q)_u16 optimization
-rw-r--r--NEON_2_SSE.h18
1 files changed, 6 insertions, 12 deletions
diff --git a/NEON_2_SSE.h b/NEON_2_SSE.h
index 26ea519..fea109a 100644
--- a/NEON_2_SSE.h
+++ b/NEON_2_SSE.h
@@ -5826,24 +5826,18 @@ _NEON2SSE_INLINE int32x4_t vabdq_s32(int32x4_t a, int32x4_t b) // VABD.S32 q0,q0
uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b); // VABD.U8 q0,q0,q0
_NEON2SSE_INLINE uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b) //no abs for unsigned
{
- __m128i cmp, difab, difba;
- cmp = vcgtq_u8(a,b);
- difab = _mm_sub_epi8(a,b);
- difba = _mm_sub_epi8 (b,a);
- difab = _mm_and_si128(cmp, difab);
- difba = _mm_andnot_si128(cmp, difba);
+ __m128i difab, difba;
+ difab = _mm_subs_epu8(a,b);
+ difba = _mm_subs_epu8 (b,a);
return _mm_or_si128(difab, difba);
}
uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b); // VABD.s16 q0,q0,q0
_NEON2SSE_INLINE uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b)
{
- __m128i cmp, difab, difba;
- cmp = vcgtq_u16(a,b);
- difab = _mm_sub_epi16(a,b);
- difba = _mm_sub_epi16 (b,a);
- difab = _mm_and_si128(cmp, difab);
- difba = _mm_andnot_si128(cmp, difba);
+ __m128i difab, difba;
+ difab = _mm_subs_epu16(a,b);
+ difba = _mm_subs_epu16 (b,a);
return _mm_or_si128(difab, difba);
}