diff options
Diffstat (limited to 'src/dsp/dec_sse2.c')
-rw-r--r-- | src/dsp/dec_sse2.c | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/src/dsp/dec_sse2.c b/src/dsp/dec_sse2.c index 472b68ec..1cac1b84 100644 --- a/src/dsp/dec_sse2.c +++ b/src/dsp/dec_sse2.c @@ -12,15 +12,15 @@ #include "./dsp.h" +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + #if defined(WEBP_USE_SSE2) #include <emmintrin.h> #include "../dec/vp8i.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) @@ -194,7 +194,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { // Add inverse transform to 'dst' and store. { - const __m128i zero = _mm_set1_epi16(0); + const __m128i zero = _mm_setzero_si128(); // Load the reference(s). __m128i dst0, dst1, dst2, dst3; if (do_two) { @@ -278,14 +278,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \ const __m128i zero = _mm_setzero_si128(); \ - const __m128i t1 = MM_ABS(p1, p0); \ - const __m128i t2 = MM_ABS(q1, q0); \ + const __m128i t_1 = MM_ABS(p1, p0); \ + const __m128i t_2 = MM_ABS(q1, q0); \ \ const __m128i h = _mm_set1_epi8(hev_thresh); \ - const __m128i t3 = _mm_subs_epu8(t1, h); /* abs(p1 - p0) - hev_tresh */ \ - const __m128i t4 = _mm_subs_epu8(t2, h); /* abs(q1 - q0) - hev_tresh */ \ + const __m128i t_3 = _mm_subs_epu8(t_1, h); /* abs(p1 - p0) - hev_tresh */ \ + const __m128i t_4 = _mm_subs_epu8(t_2, h); /* abs(q1 - q0) - hev_tresh */ \ \ - not_hev = _mm_or_si128(t3, t4); \ + not_hev = _mm_or_si128(t_3, t_4); \ not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\ } @@ -314,13 +314,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { // Updates values of 2 pixels at MB edge during complex filtering. // Update operations: -// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)] +// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)] #define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) { \ const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7); \ const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7); \ - const __m128i a = _mm_packs_epi16(a_lo7, a_hi7); \ - pi = _mm_adds_epi8(pi, a); \ - qi = _mm_subs_epi8(qi, a); \ + const __m128i delta = _mm_packs_epi16(a_lo7, a_hi7); \ + pi = _mm_adds_epi8(pi, delta); \ + qi = _mm_subs_epi8(qi, delta); \ } static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0, @@ -876,9 +876,15 @@ static void HFilter8iSSE2(uint8_t* u, uint8_t* v, int stride, Store16x4(u, v, stride, &p1, &p0, &q0, &q1); } +#endif // WEBP_USE_SSE2 + +//------------------------------------------------------------------------------ +// Entry point + extern void VP8DspInitSSE2(void); void VP8DspInitSSE2(void) { +#if defined(WEBP_USE_SSE2) VP8Transform = TransformSSE2; VP8VFilter16 = VFilter16SSE2; @@ -894,10 +900,9 @@ void VP8DspInitSSE2(void) { VP8SimpleHFilter16 = SimpleHFilter16SSE2; VP8SimpleVFilter16i = SimpleVFilter16iSSE2; VP8SimpleHFilter16i = SimpleHFilter16iSSE2; +#endif // WEBP_USE_SSE2 } #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif - -#endif // WEBP_USE_SSE2 |