aboutsummaryrefslogtreecommitdiff
path: root/src/dsp/upsampling_sse2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dsp/upsampling_sse2.c')
-rw-r--r--src/dsp/upsampling_sse2.c38
1 files changed, 16 insertions, 22 deletions
diff --git a/src/dsp/upsampling_sse2.c b/src/dsp/upsampling_sse2.c
index ba075d11..8cb275a0 100644
--- a/src/dsp/upsampling_sse2.c
+++ b/src/dsp/upsampling_sse2.c
@@ -11,10 +11,6 @@
#include "./dsp.h"
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
#if defined(WEBP_USE_SSE2)
#include <assert.h>
@@ -22,6 +18,10 @@ extern "C" {
#include <string.h>
#include "./yuv.h"
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
#ifdef FANCY_UPSAMPLING
// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
@@ -51,12 +51,12 @@ extern "C" {
// pack and store two alterning pixel rows
#define PACK_AND_STORE(a, b, da, db, out) do { \
- const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
- const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
- const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b); \
- const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b); \
- _mm_store_si128(((__m128i*)(out)) + 0, t_1); \
- _mm_store_si128(((__m128i*)(out)) + 1, t_2); \
+ const __m128i ta = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
+ const __m128i tb = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
+ const __m128i t1 = _mm_unpacklo_epi8(ta, tb); \
+ const __m128i t2 = _mm_unpackhi_epi8(ta, tb); \
+ _mm_store_si128(((__m128i*)(out)) + 0, t1); \
+ _mm_store_si128(((__m128i*)(out)) + 1, t2); \
} while (0)
// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
@@ -128,7 +128,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
- int block; \
+ int b; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[4 * 32 + 15]; \
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
@@ -154,11 +154,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
FUNC(bottom_y[0], u0, v0, bottom_dst); \
} \
\
- for (block = 0; block < num_blocks; ++block) { \
+ for (b = 0; b < num_blocks; ++b) { \
UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \
UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \
CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \
- 32 * block + 1, 32) \
+ 32 * b + 1, 32) \
top_u += 16; \
cur_u += 16; \
top_v += 16; \
@@ -184,32 +184,26 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)
#undef CONVERT2RGB
#undef SSE2_UPSAMPLE_FUNC
-#endif // FANCY_UPSAMPLING
-
-#endif // WEBP_USE_SSE2
-
//------------------------------------------------------------------------------
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
void WebPInitUpsamplersSSE2(void) {
-#if defined(WEBP_USE_SSE2)
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairSSE2;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2;
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2;
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2;
-#endif // WEBP_USE_SSE2
}
void WebPInitPremultiplySSE2(void) {
-#if defined(WEBP_USE_SSE2)
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2;
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2;
-#endif // WEBP_USE_SSE2
}
+#endif // FANCY_UPSAMPLING
+
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
-
+#endif // WEBP_USE_SSE2