diff options
Diffstat (limited to 'files/source/row_common.cc')
-rw-r--r-- | files/source/row_common.cc | 826 |
1 files changed, 521 insertions, 305 deletions
diff --git a/files/source/row_common.cc b/files/source/row_common.cc index 83442496..8be37fb5 100644 --- a/files/source/row_common.cc +++ b/files/source/row_common.cc @@ -21,6 +21,12 @@ namespace libyuv { extern "C" { #endif +#ifdef __cplusplus +#define STATIC_CAST(type, expr) static_cast<type>(expr) +#else +#define STATIC_CAST(type, expr) (type)(expr) +#endif + // This macro controls YUV to RGB using unsigned math to extend range of // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B: // LIBYUV_UNLIMITED_DATA @@ -182,12 +188,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r = src_rgb565[1] >> 3; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 2) | (g >> 4); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = 255u; dst_argb += 4; src_rgb565 += 2; @@ -199,13 +206,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - uint8_t a = src_argb1555[1] >> 7; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 3) | (g >> 2); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = -a; dst_argb += 4; src_argb1555 += 2; @@ -217,14 +225,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb4444[0] & 0x0f; - uint8_t g = src_argb4444[0] >> 4; - uint8_t r = src_argb4444[1] & 0x0f; - uint8_t a = src_argb4444[1] >> 4; - dst_argb[0] = (b << 4) | b; - dst_argb[1] = (g << 4) | g; - dst_argb[2] = (r << 4) | r; - dst_argb[3] = (a << 4) | a; + uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f); + uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4); + uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f); + uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r); + dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a); dst_argb += 4; src_argb4444 += 2; } @@ -320,7 +328,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 2; uint8_t r0 = src_argb[2] >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -334,29 +342,31 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { // or the upper byte for big endian. void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; - uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; - uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3); + uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2); + uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); dst_rgb += 4; src_argb += 8; } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -371,8 +381,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 3; uint8_t r1 = src_argb[6] >> 3; uint8_t a1 = src_argb[7] >> 7; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15)); dst_rgb += 4; src_argb += 8; } @@ -381,7 +393,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 3; uint8_t r0 = src_argb[2] >> 3; uint8_t a0 = src_argb[3] >> 7; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); } } @@ -396,8 +409,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 4; uint8_t r1 = src_argb[6] >> 4; uint8_t a1 = src_argb[7] >> 4; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12)); dst_rgb += 4; src_argb += 8; } @@ -406,18 +421,20 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 4; uint8_t r0 = src_argb[2] >> 4; uint8_t a0 = src_argb[3] >> 4; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); } } void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { int x; for (x = 0; x < width; ++x) { - uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); + uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); - uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); + uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); uint32_t a0 = (src_abgr[3] >> 6); - *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30)); dst_ar30 += 4; src_abgr += 4; } @@ -430,7 +447,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); uint32_t a0 = (src_argb[3] >> 6); - *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30)); dst_ar30 += 4; src_argb += 4; } @@ -439,10 +457,14 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { int x; for (x = 0; x < width; ++x) { - dst_ar64[0] = src_argb[0] * 0x0101; - dst_ar64[1] = src_argb[1] * 0x0101; - dst_ar64[2] = src_argb[2] * 0x0101; - dst_ar64[3] = src_argb[3] * 0x0101; + uint16_t b = src_argb[0] * 0x0101; + uint16_t g = src_argb[1] * 0x0101; + uint16_t r = src_argb[2] * 0x0101; + uint16_t a = src_argb[3] * 0x0101; + dst_ar64[0] = b; + dst_ar64[1] = g; + dst_ar64[2] = r; + dst_ar64[3] = a; dst_ar64 += 4; src_argb += 4; } @@ -451,10 +473,14 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { int x; for (x = 0; x < width; ++x) { - dst_ab64[0] = src_argb[2] * 0x0101; - dst_ab64[1] = src_argb[1] * 0x0101; - dst_ab64[2] = src_argb[0] * 0x0101; - dst_ab64[3] = src_argb[3] * 0x0101; + uint16_t b = src_argb[0] * 0x0101; + uint16_t g = src_argb[1] * 0x0101; + uint16_t r = src_argb[2] * 0x0101; + uint16_t a = src_argb[3] * 0x0101; + dst_ab64[0] = r; + dst_ab64[1] = g; + dst_ab64[2] = b; + dst_ab64[3] = a; dst_ab64 += 4; src_argb += 4; } @@ -463,10 +489,14 @@ void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - dst_argb[0] = src_ar64[0] >> 8; - dst_argb[1] = src_ar64[1] >> 8; - dst_argb[2] = src_ar64[2] >> 8; - dst_argb[3] = src_ar64[3] >> 8; + uint8_t b = src_ar64[0] >> 8; + uint8_t g = src_ar64[1] >> 8; + uint8_t r = src_ar64[2] >> 8; + uint8_t a = src_ar64[3] >> 8; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; dst_argb += 4; src_ar64 += 4; } @@ -475,10 +505,14 @@ void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - dst_argb[0] = src_ab64[2] >> 8; - dst_argb[1] = src_ab64[1] >> 8; - dst_argb[2] = src_ab64[0] >> 8; - dst_argb[3] = src_ab64[3] >> 8; + uint8_t r = src_ab64[0] >> 8; + uint8_t g = src_ab64[1] >> 8; + uint8_t b = src_ab64[2] >> 8; + uint8_t a = src_ab64[3] >> 8; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; dst_argb += 4; src_ab64 += 4; } @@ -514,8 +548,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64, #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return ((33 * r + 65 * g + 13 * b) >> 7) + 16; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16); } #else // 8 bit @@ -524,8 +558,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) + // 0x7e80) >> 8; -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8); } #endif @@ -533,29 +567,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round. #ifdef LIBYUV_RGBTOU_TRUNCATE -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8000) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8000) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8); } #else // TODO(fbarchard): Add rounding to x86 SIMD and use this -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8); } #endif // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb. #if !defined(LIBYUV_ARGBTOUV_PAVGB) static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8); } static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8); } #endif @@ -674,28 +710,28 @@ MAKEROWY(RAW, 0, 1, 2, 3) #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (38 * r + 75 * g + 15 * b + 64) >> 7; } #else // 8 bit -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (77 * r + 150 * g + 29 * b + 128) >> 8; } #endif #if defined(LIBYUV_ARGBTOUV_PAVGB) -static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } -static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #else -static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8; } -static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8; } #endif @@ -782,6 +818,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { #endif MAKEROWYJ(ARGB, 2, 1, 0, 4) +MAKEROWYJ(ABGR, 0, 1, 2, 4) MAKEROWYJ(RGBA, 3, 2, 1, 4) MAKEROWYJ(RGB24, 2, 1, 0, 3) MAKEROWYJ(RAW, 0, 1, 2, 3) @@ -791,11 +828,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); uint8_t r = src_rgb565[1] >> 3; - b = (b << 3) | (b >> 2); - g = (g << 2) | (g >> 4); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_rgb565 += 2; dst_y += 1; @@ -806,11 +844,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - b = (b << 3) | (b >> 2); - g = (g << 3) | (g >> 2); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_argb1555 += 2; dst_y += 1; @@ -823,9 +862,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { uint8_t b = src_argb4444[0] & 0x0f; uint8_t g = src_argb4444[0] >> 4; uint8_t r = src_argb4444[1] & 0x0f; - b = (b << 4) | b; - g = (g << 4) | g; - r = (r << 4) | r; + b = STATIC_CAST(uint8_t, (b << 4) | b); + g = STATIC_CAST(uint8_t, (g << 4) | g); + r = STATIC_CAST(uint8_t, (r << 4) | r); dst_y[0] = RGBToY(r, g, b); src_argb4444 += 2; dst_y += 1; @@ -840,31 +879,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b1 = src_rgb565[2] & 0x1f; - uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8_t r1 = src_rgb565[3] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - uint8_t b3 = next_rgb565[2] & 0x1f; - uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8_t r3 = next_rgb565[3] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 2) | (g1 >> 4); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 2) | (g3 >> 4); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -886,19 +929,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -924,31 +968,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b1 = src_argb1555[2] & 0x1f; - uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8_t b3 = next_argb1555[2] & 0x1f; - uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 3) | (g1 >> 2); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 3) | (g3 >> 2); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -970,19 +1018,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1021,18 +1071,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g3 = next_argb4444[2] >> 4; uint8_t r3 = next_argb4444[3] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b1 = (b1 << 4) | b1; - g1 = (g1 << 4) | g1; - r1 = (r1 << 4) | r1; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; - b3 = (b3 << 4) | b3; - g3 = (g3 << 4) | g3; - r3 = (r3 << 4) | r3; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1); + g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1); + r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); + b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3); + g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3); + r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -1061,12 +1111,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g2 = next_argb4444[0] >> 4; uint8_t r2 = next_argb4444[1] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1123,9 +1173,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int sg = (b * 22 + g * 88 + r * 45) >> 7; int sr = (b * 24 + g * 98 + r * 50) >> 7; // b does not over flow. a is preserved from original. - dst_argb[0] = sb; - dst_argb[1] = clamp255(sg); - dst_argb[2] = clamp255(sr); + dst_argb[0] = STATIC_CAST(uint8_t, sb); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr)); dst_argb += 4; } } @@ -1154,10 +1204,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb, int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + a * matrix_argb[15]) >> 6; - dst_argb[0] = Clamp(sb); - dst_argb[1] = Clamp(sg); - dst_argb[2] = Clamp(sr); - dst_argb[3] = Clamp(sa); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb)); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr)); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa)); src_argb += 4; dst_argb += 4; } @@ -1207,9 +1257,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb, int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; - dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; - dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; - dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; + dst_argb[0] = STATIC_CAST( + uint8_t, (b * scale >> 16) * interval_size + interval_offset); + dst_argb[1] = STATIC_CAST( + uint8_t, (g * scale >> 16) * interval_size + interval_offset); + dst_argb[2] = STATIC_CAST( + uint8_t, (r * scale >> 16) * interval_size + interval_offset); dst_argb += 4; } } @@ -1260,10 +1313,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb, const uint32_t g_scale = src_argb1[1]; const uint32_t r_scale = src_argb1[2]; const uint32_t a_scale = src_argb1[3]; - dst_argb[0] = SHADE(b, b_scale); - dst_argb[1] = SHADE(g, g_scale); - dst_argb[2] = SHADE(r, r_scale); - dst_argb[3] = SHADE(a, a_scale); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1288,10 +1341,10 @@ void ARGBAddRow_C(const uint8_t* src_argb, const int g_add = src_argb1[1]; const int r_add = src_argb1[2]; const int a_add = src_argb1[3]; - dst_argb[0] = SHADE(b, b_add); - dst_argb[1] = SHADE(g, g_add); - dst_argb[2] = SHADE(r, r_add); - dst_argb[3] = SHADE(a, a_add); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1315,10 +1368,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb, const int g_sub = src_argb1[1]; const int r_sub = src_argb1[2]; const int a_sub = src_argb1[3]; - dst_argb[0] = SHADE(b, b_sub); - dst_argb[1] = SHADE(g, g_sub); - dst_argb[2] = SHADE(r, r_sub); - dst_argb[3] = SHADE(a, a_sub); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1431,7 +1484,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // clang-format off -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) // Bias values include subtract 128 from U and V, bias from Y and rounding. // For B and R bias is negative. For G bias is positive. #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \ @@ -1627,7 +1680,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR) #undef MAKEYUVCONSTANTS -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) #define LOAD_YUV_CONSTANTS \ int ub = yuvconstants->kUVCoeff[0]; \ int vr = yuvconstants->kUVCoeff[1]; \ @@ -1675,9 +1728,9 @@ static __inline void YuvPixel(uint8_t y, LOAD_YUV_CONSTANTS; uint32_t y32 = y * 0x0101; CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // Reads 8 bit YUV and leaves result as 16 bit. @@ -1706,9 +1759,9 @@ static __inline void YuvPixel10_16(uint16_t y, int* r, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; - uint32_t y32 = y << 6; - u = clamp255(u >> 2); - v = clamp255(v >> 2); + uint32_t y32 = (y << 6) | (y >> 4); + u = STATIC_CAST(uint8_t, clamp255(u >> 2)); + v = STATIC_CAST(uint8_t, clamp255(v >> 2)); CALC_RGB16; *b = b16; *g = g16; @@ -1725,9 +1778,9 @@ static __inline void YuvPixel12_16(int16_t y, int* r, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; - uint32_t y32 = y << 4; - u = clamp255(u >> 4); - v = clamp255(v >> 4); + uint32_t y32 = (y << 4) | (y >> 8); + u = STATIC_CAST(uint8_t, clamp255(u >> 4)); + v = STATIC_CAST(uint8_t, clamp255(v >> 4)); CALC_RGB16; *b = b16; *g = g16; @@ -1747,9 +1800,9 @@ static __inline void YuvPixel10(uint16_t y, int g16; int r16; YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 12 bit assembly. @@ -1765,9 +1818,9 @@ static __inline void YuvPixel12(uint16_t y, int g16; int r16; YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1781,12 +1834,12 @@ static __inline void YuvPixel16_8(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1800,8 +1853,8 @@ static __inline void YuvPixel16_16(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; *b = b16; *g = g16; @@ -1815,7 +1868,7 @@ static __inline void YPixel(uint8_t y, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) int yg = yuvconstants->kRGBCoeffBias[0]; int ygb = yuvconstants->kRGBCoeffBias[4]; #else @@ -1823,9 +1876,9 @@ static __inline void YPixel(uint8_t y, int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = Clamp(((int32_t)(y1) + ygb) >> 6); - *g = Clamp(((int32_t)(y1) + ygb) >> 6); - *r = Clamp(((int32_t)(y1) + ygb) >> 6); + *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); } void I444ToARGBRow_C(const uint8_t* src_y, @@ -1846,6 +1899,23 @@ void I444ToARGBRow_C(const uint8_t* src_y, } } +void I444ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + src_y += 1; + src_u += 1; + src_v += 1; + rgb_buf += 3; // Advance 1 pixel. + } +} + // Also used for 420 void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, @@ -1929,10 +1999,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width - 1; x += 2) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = clamp255(src_a[1] >> 2); + rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2)); src_y += 2; src_u += 1; src_v += 1; @@ -1942,7 +2012,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, if (width & 1) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); } } @@ -1957,7 +2027,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width; ++x) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); src_y += 1; src_u += 1; src_v += 1; @@ -2283,8 +2353,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; - *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; - *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); + *(uint16_t*)(dst_argb4444 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000); src_y += 2; src_u += 1; src_v += 1; @@ -2295,7 +2367,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; - *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); } } @@ -2321,8 +2394,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; - *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; - *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); + *(uint16_t*)(dst_argb1555 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000); src_y += 2; src_u += 1; src_v += 1; @@ -2333,7 +2408,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; - *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); } } @@ -2359,8 +2435,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb565 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); src_y += 2; src_u += 1; src_v += 1; @@ -2371,7 +2449,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -2486,8 +2565,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); + *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) | + STATIC_CAST(uint16_t, g1 << 5) | + STATIC_CAST(uint16_t, r1 << 11); src_y += 2; src_uv += 2; dst_rgb565 += 4; // Advance 2 pixels. @@ -2497,7 +2580,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); } } @@ -2603,6 +2688,19 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { } } +void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) { + int x; + src += width - 1; + for (x = 0; x < width - 1; x += 2) { + dst[x] = src[0]; + dst[x + 1] = src[-1]; + src -= 2; + } + if (width & 1) { + dst[width - 1] = src[0]; + } +} + void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) { int x; src_uv += (width - 1) << 1; @@ -2714,6 +2812,21 @@ void DetileRow_C(const uint8_t* src, } } +void DetileRow_16_C(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + int x; + for (x = 0; x < width - 15; x += 16) { + memcpy(dst, src, 16 * sizeof(uint16_t)); + dst += 16; + src += src_tile_stride; + } + if (width & 15) { + memcpy(dst, src, (width & 15) * sizeof(uint16_t)); + } +} + void DetileSplitUVRow_C(const uint8_t* src_uv, ptrdiff_t src_tile_stride, uint8_t* dst_u, @@ -2731,6 +2844,51 @@ void DetileSplitUVRow_C(const uint8_t* src_uv, } } +void DetileToYUY2_C(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + for (int x = 0; x < width - 15; x += 16) { + for (int i = 0; i < 8; i++) { + dst_yuy2[0] = src_y[0]; + dst_yuy2[1] = src_uv[0]; + dst_yuy2[2] = src_y[1]; + dst_yuy2[3] = src_uv[1]; + dst_yuy2 += 4; + src_y += 2; + src_uv += 2; + } + src_y += src_y_tile_stride - 16; + src_uv += src_uv_tile_stride - 16; + } +} + +// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded +// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the +// block contain all of the lower 2 bits of each pixel packed together, and the +// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are +// packed into 1x4 blocks, whereas the upper bits are packed in normal raster +// order. +void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) { + for (size_t i = 0; i < size; i += 80) { + const uint8_t* src_lower_bits = src; + const uint8_t* src_upper_bits = src + 16; + + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 16; k++) { + *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 | + (uint16_t)*src_upper_bits << 8 | + (uint16_t)*src_upper_bits >> 2; + src_upper_bits++; + } + } + + src += 80; + } +} + void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, @@ -2823,10 +2981,10 @@ void MergeAR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; - dst_ar64[3] = ClampMax(src_a[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); + dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift); dst_ar64 += 4; } } @@ -2843,10 +3001,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); - dst_argb[3] = clamp255(src_a[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); + dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift)); dst_argb += 4; } } @@ -2863,9 +3021,9 @@ void MergeXR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); dst_ar64[3] = 0xffff; dst_ar64 += 4; } @@ -2882,9 +3040,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); dst_argb[3] = 0xff; dst_argb += 4; } @@ -2930,8 +3088,8 @@ void MergeUVRow_16_C(const uint16_t* src_u, assert(depth <= 16); int x; for (x = 0; x < width; ++x) { - dst_uv[0] = src_u[x] << shift; - dst_uv[1] = src_v[x] << shift; + dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift); + dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift); dst_uv += 2; } } @@ -2959,7 +3117,7 @@ void MultiplyRow_16_C(const uint16_t* src_y, int width) { int x; for (x = 0; x < width; ++x) { - dst_y[x] = src_y[x] * scale; + dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale); } } @@ -2990,7 +3148,7 @@ void Convert16To8Row_C(const uint16_t* src_y, assert(scale <= 32768); for (x = 0; x < width; ++x) { - dst_y[x] = C16TO8(src_y[x], scale); + dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale)); } } @@ -3043,6 +3201,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2, } } +// Filter 2 rows of YUY2 UV's (422) into UV (NV12). +void YUY2ToNVUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_uv, + int width) { + // Output a row of UV values, filtering 2 rows of YUY2. + int x; + for (x = 0; x < width; x += 2) { + dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; + dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; + src_yuy2 += 4; + dst_uv += 2; + } +} + // Copy row of YUY2 UV's (422) into U and V (422). void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, @@ -3138,9 +3311,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; fb = src_argb[4 + 0]; @@ -3150,9 +3323,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, bb = src_argb1[4 + 0]; bg = src_argb1[4 + 1]; br = src_argb1[4 + 2]; - dst_argb[4 + 0] = BLEND(fb, bb, a); - dst_argb[4 + 1] = BLEND(fg, bg, a); - dst_argb[4 + 2] = BLEND(fr, br, a); + dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[4 + 3] = 255u; src_argb += 8; src_argb1 += 8; @@ -3167,9 +3340,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; } } @@ -3214,7 +3387,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); b = src_argb[4]; g = src_argb[5]; r = src_argb[6]; @@ -3222,7 +3395,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[4] = ATTENUATE(b, a); dst_argb[5] = ATTENUATE(g, a); dst_argb[6] = ATTENUATE(r, a); - dst_argb[7] = a; + dst_argb[7] = STATIC_CAST(uint8_t, a); src_argb += 8; dst_argb += 8; } @@ -3235,7 +3408,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); } } #undef ATTENUATE @@ -3307,10 +3480,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb, const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point // Clamping should not be necessary but is free in assembly. - dst_argb[0] = UNATTENUATE(b, ia); - dst_argb[1] = UNATTENUATE(g, ia); - dst_argb[2] = UNATTENUATE(r, ia); - dst_argb[3] = a; + dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia)); + dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia)); + dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia)); + dst_argb[3] = STATIC_CAST(uint8_t, a); src_argb += 4; dst_argb += 4; } @@ -3344,12 +3517,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl, int i; assert(area != 0); - ooa = 1.0f / area; + ooa = 1.0f / STATIC_CAST(float, area); for (i = 0; i < count; ++i) { - dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); + dst[0] = + (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * + ooa); + dst[1] = + (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * + ooa); + dst[2] = + (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * + ooa); + dst[3] = + (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * + ooa); dst += 4; tl += 4; bl += 4; @@ -3407,7 +3588,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv, int width) { int x; for (x = 0; x < width; ++x) { - dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale); + dst_uv[x] = STATIC_CAST( + uint8_t, + C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale)); } } @@ -3433,8 +3616,9 @@ void InterpolateRow_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint8_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3463,8 +3647,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint16_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3501,9 +3686,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = C16TO8( - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, - scale); + dst_ptr[0] = STATIC_CAST( + uint8_t, + C16TO8( + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, + scale)); src_ptr += 1; src_ptr1 += 1; dst_ptr += 1; @@ -3615,10 +3802,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb, dr += poly[14] * r3; da += poly[15] * a3; - dst_argb[0] = Clamp((int32_t)(db)); - dst_argb[1] = Clamp((int32_t)(dg)); - dst_argb[2] = Clamp((int32_t)(dr)); - dst_argb[3] = Clamp((int32_t)(da)); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db))); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg))); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr))); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da))); src_argb += 4; dst_argb += 4; } @@ -4023,6 +4210,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, } #endif +#if defined(HAS_I444TORGB24ROW_AVX2) +void I444ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif + src_y += twidth; + src_u += twidth; + src_v += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, @@ -4164,8 +4377,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) { void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { int i; for (i = 0; i < width; ++i) { - *dst++ = - (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8; + *dst++ = STATIC_CAST( + uint16_t, + (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8); ++src; } } @@ -4325,6 +4539,8 @@ void HalfMergeUVRow_C(const uint8_t* src_u, } } +#undef STATIC_CAST + #ifdef __cplusplus } // extern "C" } // namespace libyuv |