diff options
Diffstat (limited to 'third_party/libyuv/source/row_common.cc')
-rw-r--r-- | third_party/libyuv/source/row_common.cc | 1230 |
1 files changed, 793 insertions, 437 deletions
diff --git a/third_party/libyuv/source/row_common.cc b/third_party/libyuv/source/row_common.cc index a941c3f5fc..c6e412414e 100644 --- a/third_party/libyuv/source/row_common.cc +++ b/third_party/libyuv/source/row_common.cc @@ -10,6 +10,7 @@ #include "libyuv/row.h" +#include <assert.h> #include <stdio.h> #include <string.h> // For memcpy and memset. @@ -21,10 +22,14 @@ namespace libyuv { extern "C" { #endif -// The following ifdef from row_win makes the C code match the row_win code, -// which is 7 bit fixed point. +// This macro control YUV to RGB using unsigned math to extend range of +// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B: +// LIBYUV_UNLIMITED_DATA + +// The following macro from row_win makes the C code match the row_win code, +// which is 7 bit fixed point for ARGBToI420: #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ - (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) + !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) #define LIBYUV_RGB7 1 #endif @@ -50,6 +55,11 @@ static __inline int32_t clamp1023(int32_t v) { return (-(v >= 1023) | v) & 1023; } +// clamp to max +static __inline int32_t ClampMax(int32_t v, int32_t max) { + return (-(v >= max) | v) & max; +} + static __inline uint32_t Abs(int32_t v) { int m = -(v < 0); return (v + m) ^ m; @@ -67,6 +77,10 @@ static __inline int32_t clamp1023(int32_t v) { return (v > 1023) ? 1023 : v; } +static __inline int32_t ClampMax(int32_t v, int32_t max) { + return (v > max) ? max : v; +} + static __inline uint32_t Abs(int32_t v) { return (v < 0) ? -v : v; } @@ -413,6 +427,82 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { } } +void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { + int x; + for (x = 0; x < width; ++x) { + dst_ar64[0] = src_argb[0] * 0x0101; + dst_ar64[1] = src_argb[1] * 0x0101; + dst_ar64[2] = src_argb[2] * 0x0101; + dst_ar64[3] = src_argb[3] * 0x0101; + dst_ar64 += 4; + src_argb += 4; + } +} + +void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { + int x; + for (x = 0; x < width; ++x) { + dst_ab64[0] = src_argb[2] * 0x0101; + dst_ab64[1] = src_argb[1] * 0x0101; + dst_ab64[2] = src_argb[0] * 0x0101; + dst_ab64[3] = src_argb[3] * 0x0101; + dst_ab64 += 4; + src_argb += 4; + } +} + +void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + dst_argb[0] = src_ar64[0] >> 8; + dst_argb[1] = src_ar64[1] >> 8; + dst_argb[2] = src_ar64[2] >> 8; + dst_argb[3] = src_ar64[3] >> 8; + dst_argb += 4; + src_ar64 += 4; + } +} + +void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + dst_argb[0] = src_ab64[2] >> 8; + dst_argb[1] = src_ab64[1] >> 8; + dst_argb[2] = src_ab64[0] >> 8; + dst_argb[3] = src_ab64[3] >> 8; + dst_argb += 4; + src_ab64 += 4; + } +} + +// TODO(fbarchard): Make shuffle compatible with SIMD versions +void AR64ShuffleRow_C(const uint8_t* src_ar64, + uint8_t* dst_ar64, + const uint8_t* shuffler, + int width) { + const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64; + uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64; + int index0 = shuffler[0] / 2; + int index1 = shuffler[2] / 2; + int index2 = shuffler[4] / 2; + int index3 = shuffler[6] / 2; + // Shuffle a row of AR64. + int x; + for (x = 0; x < width / 2; ++x) { + // To support in-place conversion. + uint16_t b = src_ar64_16[index0]; + uint16_t g = src_ar64_16[index1]; + uint16_t r = src_ar64_16[index2]; + uint16_t a = src_ar64_16[index3]; + dst_ar64_16[0] = b; + dst_ar64_16[1] = g; + dst_ar64_16[2] = r; + dst_ar64_16[3] = a; + src_ar64_16 += 4; + dst_ar64_16 += 4; + } +} + #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { @@ -462,80 +552,80 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) { // Intel version mimic SSE/AVX which does 2 pavgb #if LIBYUV_ARGBTOUV_PAVGB -#define MAKEROWY(NAME, R, G, B, BPP) \ - void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ - uint8_t* dst_u, uint8_t* dst_v, int width) { \ - const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ - AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ - uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ - AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ - uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ - AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ - uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ - uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - } \ +#define MAKEROWY(NAME, R, G, B, BPP) \ + void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \ + src_rgb += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \ + AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \ + uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \ + AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \ + uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \ + AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + src_rgb += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \ + uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \ + uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + } \ } #else // ARM version does sum / 2 then multiply by 2x smaller coefficients -#define MAKEROWY(NAME, R, G, B, BPP) \ - void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ - uint8_t* dst_u, uint8_t* dst_v, int width) { \ - const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ - src_rgb1[B + BPP] + 1) >> \ - 1; \ - uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ - src_rgb1[G + BPP] + 1) >> \ - 1; \ - uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ - src_rgb1[R + BPP] + 1) >> \ - 1; \ - dst_u[0] = RGB2xToU(ar, ag, ab); \ - dst_v[0] = RGB2xToV(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint16_t ab = src_rgb0[B] + src_rgb1[B]; \ - uint16_t ag = src_rgb0[G] + src_rgb1[G]; \ - uint16_t ar = src_rgb0[R] + src_rgb1[R]; \ - dst_u[0] = RGB2xToU(ar, ag, ab); \ - dst_v[0] = RGB2xToV(ar, ag, ab); \ - } \ +#define MAKEROWY(NAME, R, G, B, BPP) \ + void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \ + src_rgb += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \ + src_rgb1[B + BPP] + 1) >> \ + 1; \ + uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \ + src_rgb1[G + BPP] + 1) >> \ + 1; \ + uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \ + src_rgb1[R + BPP] + 1) >> \ + 1; \ + dst_u[0] = RGB2xToU(ar, ag, ab); \ + dst_v[0] = RGB2xToV(ar, ag, ab); \ + src_rgb += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint16_t ab = src_rgb[B] + src_rgb1[B]; \ + uint16_t ag = src_rgb[G] + src_rgb1[G]; \ + uint16_t ar = src_rgb[R] + src_rgb1[R]; \ + dst_u[0] = RGB2xToU(ar, ag, ab); \ + dst_v[0] = RGB2xToV(ar, ag, ab); \ + } \ } #endif @@ -603,80 +693,80 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { // ARGBToYJ_C and ARGBToUVJ_C // Intel version mimic SSE/AVX which does 2 pavgb #if LIBYUV_ARGBTOUV_PAVGB -#define MAKEROWYJ(NAME, R, G, B, BPP) \ - void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ - uint8_t* dst_u, uint8_t* dst_v, int width) { \ - const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ - AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ - uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ - AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ - uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ - AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ - uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ - uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - } \ +#define MAKEROWYJ(NAME, R, G, B, BPP) \ + void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \ + src_rgb += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \ + AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \ + uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \ + AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \ + uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \ + AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + src_rgb += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \ + uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \ + uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + } \ } #else // ARM version does sum / 2 then multiply by 2x smaller coefficients -#define MAKEROWYJ(NAME, R, G, B, BPP) \ - void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ - } \ - void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ - uint8_t* dst_u, uint8_t* dst_v, int width) { \ - const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ - src_rgb1[B + BPP] + 1) >> \ - 1; \ - uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ - src_rgb1[G + BPP] + 1) >> \ - 1; \ - uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ - src_rgb1[R + BPP] + 1) >> \ - 1; \ - dst_u[0] = RGB2xToUJ(ar, ag, ab); \ - dst_v[0] = RGB2xToVJ(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \ - uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \ - uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \ - dst_u[0] = RGB2xToUJ(ar, ag, ab); \ - dst_v[0] = RGB2xToVJ(ar, ag, ab); \ - } \ +#define MAKEROWYJ(NAME, R, G, B, BPP) \ + void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \ + src_rgb += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \ + src_rgb1[B + BPP] + 1) >> \ + 1; \ + uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \ + src_rgb1[G + BPP] + 1) >> \ + 1; \ + uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \ + src_rgb1[R + BPP] + 1) >> \ + 1; \ + dst_u[0] = RGB2xToUJ(ar, ag, ab); \ + dst_v[0] = RGB2xToVJ(ar, ag, ab); \ + src_rgb += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint16_t ab = (src_rgb[B] + src_rgb1[B]); \ + uint16_t ag = (src_rgb[G] + src_rgb1[G]); \ + uint16_t ar = (src_rgb[R] + src_rgb1[R]); \ + dst_u[0] = RGB2xToUJ(ar, ag, ab); \ + dst_v[0] = RGB2xToVJ(ar, ag, ab); \ + } \ } #endif @@ -1146,16 +1236,16 @@ void ARGBShadeRow_C(const uint8_t* src_argb, #define REPEAT8(v) (v) | ((v) << 8) #define SHADE(f, v) v* f >> 16 -void ARGBMultiplyRow_C(const uint8_t* src_argb0, +void ARGBMultiplyRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { - const uint32_t b = REPEAT8(src_argb0[0]); - const uint32_t g = REPEAT8(src_argb0[1]); - const uint32_t r = REPEAT8(src_argb0[2]); - const uint32_t a = REPEAT8(src_argb0[3]); + const uint32_t b = REPEAT8(src_argb[0]); + const uint32_t g = REPEAT8(src_argb[1]); + const uint32_t r = REPEAT8(src_argb[2]); + const uint32_t a = REPEAT8(src_argb[3]); const uint32_t b_scale = src_argb1[0]; const uint32_t g_scale = src_argb1[1]; const uint32_t r_scale = src_argb1[2]; @@ -1164,7 +1254,7 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0, dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); dst_argb[3] = SHADE(a, a_scale); - src_argb0 += 4; + src_argb += 4; src_argb1 += 4; dst_argb += 4; } @@ -1174,16 +1264,16 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0, #define SHADE(f, v) clamp255(v + f) -void ARGBAddRow_C(const uint8_t* src_argb0, +void ARGBAddRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { - const int b = src_argb0[0]; - const int g = src_argb0[1]; - const int r = src_argb0[2]; - const int a = src_argb0[3]; + const int b = src_argb[0]; + const int g = src_argb[1]; + const int r = src_argb[2]; + const int a = src_argb[3]; const int b_add = src_argb1[0]; const int g_add = src_argb1[1]; const int r_add = src_argb1[2]; @@ -1192,7 +1282,7 @@ void ARGBAddRow_C(const uint8_t* src_argb0, dst_argb[1] = SHADE(g, g_add); dst_argb[2] = SHADE(r, r_add); dst_argb[3] = SHADE(a, a_add); - src_argb0 += 4; + src_argb += 4; src_argb1 += 4; dst_argb += 4; } @@ -1201,16 +1291,16 @@ void ARGBAddRow_C(const uint8_t* src_argb0, #define SHADE(f, v) clamp0(f - v) -void ARGBSubtractRow_C(const uint8_t* src_argb0, +void ARGBSubtractRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { - const int b = src_argb0[0]; - const int g = src_argb0[1]; - const int r = src_argb0[2]; - const int a = src_argb0[3]; + const int b = src_argb[0]; + const int g = src_argb[1]; + const int r = src_argb[2]; + const int a = src_argb[3]; const int b_sub = src_argb1[0]; const int g_sub = src_argb1[1]; const int r_sub = src_argb1[2]; @@ -1219,7 +1309,7 @@ void ARGBSubtractRow_C(const uint8_t* src_argb0, dst_argb[1] = SHADE(g, g_sub); dst_argb[2] = SHADE(r, r_sub); dst_argb[3] = SHADE(a, a_sub); - src_argb0 += 4; + src_argb += 4; src_argb1 += 4; dst_argb += 4; } @@ -1329,64 +1419,36 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // Macros to create SIMD specific yuv to rgb conversion constants. -#if defined(__aarch64__) -#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ - const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {UB, VR, UB, VR, UB, VR, UB, VR}, {UB, VR, UB, VR, UB, VR, UB, VR}, \ - {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, \ - {BB, BG, BR, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; \ - const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {VR, UB, VR, UB, VR, UB, VR, UB}, {VR, UB, VR, UB, VR, UB, VR, UB}, \ - {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, \ - {BR, BG, BB, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; - -#elif defined(__arm__) -#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ - const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {UB, UB, UB, UB, VR, VR, VR, VR, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {BB, BG, BR, YB, 0, 0, 0, 0}, \ - {0x0101 * YG, YG, 0, 0}}; \ - const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {VR, VR, VR, VR, UB, UB, UB, UB, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {BR, BG, BB, YB, 0, 0, 0, 0}, \ - {0x0101 * YG, YG, 0, 0}}; +// clang-format off +#if defined(__aarch64__) || defined(__arm__) +// Bias values include subtract 128 from U and V, bias from Y and rounding. +// For B and R bias is negative. For G bias is positive. +#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \ + {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \ + 0, 0}} #else -#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ - const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, \ - -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0}, \ - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \ - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \ - {0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, \ - 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR}, \ - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ - {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; \ - const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {-VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, \ - -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0}, \ - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \ - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \ - {0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, \ - 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB}, \ - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ - {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; +#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \ + {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \ + UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \ + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \ + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \ + {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \ + 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \ + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ + {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}} #endif -// TODO(fbarchard): Generate SIMD structures from float matrix. +// clang-format on -// Bias values to round, and subtract 128 from U and V. -#define BB (-UB * 128 + YB) -#define BG (UG * 128 + VG * 128 + YB) -#define BR (-VR * 128 + YB) +#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \ + YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \ + YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB); + +// TODO(fbarchard): Generate SIMD structures from float matrix. // BT.601 limited range YUV to RGB reference // R = (Y - 16) * 1.164 + V * 1.596 @@ -1395,7 +1457,11 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // KR = 0.299; KB = 0.114 // U and V contributions to R,G,B. +#ifdef LIBYUV_UNLIMITED_DATA +#define UB 129 /* round(2.018 * 64) */ +#else #define UB 128 /* max(128, round(2.018 * 64)) */ +#endif #define UG 25 /* round(0.391 * 64) */ #define VG 52 /* round(0.813 * 64) */ #define VR 102 /* round(1.596 * 64) */ @@ -1404,7 +1470,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1429,7 +1495,7 @@ MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR, BB, BG, BR) #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ #define YB 32 /* 64 / 2 */ -MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1444,9 +1510,12 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR) // B = (Y - 16) * 1.164 + U * 2.112 // KR = 0.2126, KB = 0.0722 -// TODO(fbarchard): Find way to express 2.112 instead of 2.0. // U and V contributions to R,G,B. +#ifdef LIBYUV_UNLIMITED_DATA +#define UB 135 /* round(2.112 * 64) */ +#else #define UB 128 /* max(128, round(2.112 * 64)) */ +#endif #define UG 14 /* round(0.213 * 64) */ #define VG 34 /* round(0.533 * 64) */ #define VR 115 /* round(1.793 * 64) */ @@ -1455,7 +1524,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR) #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1480,7 +1549,7 @@ MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR, BB, BG, BR) #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ #define YB 32 /* 64 / 2 */ -MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1495,9 +1564,12 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR) // B = (Y - 16) * 1.164384 + U * 2.14177 // KR = 0.2627; KB = 0.0593 -// TODO(fbarchard): Improve accuracy; the B channel is off by 7%. // U and V contributions to R,G,B. +#ifdef LIBYUV_UNLIMITED_DATA +#define UB 137 /* round(2.142 * 64) */ +#else #define UB 128 /* max(128, round(2.142 * 64)) */ +#endif #define UG 12 /* round(0.187326 * 64) */ #define VG 42 /* round(0.65042 * 64) */ #define VR 107 /* round(1.67867 * 64) */ @@ -1506,7 +1578,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR) #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */ #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1530,7 +1602,7 @@ MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ #define YB 32 /* 64 / 2 */ -MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR) #undef YG #undef YB @@ -1545,6 +1617,42 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) #undef MAKEYUVCONSTANTS +#if defined(__aarch64__) || defined(__arm__) +#define LOAD_YUV_CONSTANTS \ + int ub = yuvconstants->kUVCoeff[0]; \ + int vr = yuvconstants->kUVCoeff[1]; \ + int ug = yuvconstants->kUVCoeff[2]; \ + int vg = yuvconstants->kUVCoeff[3]; \ + int yg = yuvconstants->kRGBCoeffBias[0]; \ + int bb = yuvconstants->kRGBCoeffBias[1]; \ + int bg = yuvconstants->kRGBCoeffBias[2]; \ + int br = yuvconstants->kRGBCoeffBias[3] + +#define CALC_RGB16 \ + int32_t y1 = (uint32_t)(y32 * yg) >> 16; \ + int b16 = y1 + (u * ub) - bb; \ + int g16 = y1 + bg - (u * ug + v * vg); \ + int r16 = y1 + (v * vr) - br +#else +#define LOAD_YUV_CONSTANTS \ + int ub = yuvconstants->kUVToB[0]; \ + int ug = yuvconstants->kUVToG[0]; \ + int vg = yuvconstants->kUVToG[1]; \ + int vr = yuvconstants->kUVToR[1]; \ + int yg = yuvconstants->kYToRgb[0]; \ + int yb = yuvconstants->kYBiasToRgb[0] + +#define CALC_RGB16 \ + int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \ + int8_t ui = u; \ + int8_t vi = v; \ + ui -= 0x80; \ + vi -= 0x80; \ + int b16 = y1 + (ui * ub); \ + int g16 = y1 - (ui * ug + vi * vg); \ + int r16 = y1 + (vi * vr) +#endif + // C reference code that mimics the YUV assembly. // Reads 8 bit YUV and leaves result as 16 bit. static __inline void YuvPixel(uint8_t y, @@ -1554,39 +1662,12 @@ static __inline void YuvPixel(uint8_t y, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { -#if defined(__aarch64__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = -yuvconstants->kUVToRB[1]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#elif defined(__arm__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[4]; - int vr = -yuvconstants->kUVToRB[4]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#else - int ub = yuvconstants->kUVToB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = yuvconstants->kUVToR[1]; - int bb = yuvconstants->kUVBiasB[0]; - int bg = yuvconstants->kUVBiasG[0]; - int br = yuvconstants->kUVBiasR[0]; - int yg = yuvconstants->kYToRgb[0]; -#endif - - uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6); - *g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6); - *r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6); + LOAD_YUV_CONSTANTS; + uint32_t y32 = y * 0x0101; + CALC_RGB16; + *b = Clamp((int32_t)(b16) >> 6); + *g = Clamp((int32_t)(g16) >> 6); + *r = Clamp((int32_t)(r16) >> 6); } // Reads 8 bit YUV and leaves result as 16 bit. @@ -1597,85 +1678,50 @@ static __inline void YuvPixel8_16(uint8_t y, int* g, int* r, const struct YuvConstants* yuvconstants) { -#if defined(__aarch64__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = -yuvconstants->kUVToRB[1]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#elif defined(__arm__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[4]; - int vr = -yuvconstants->kUVToRB[4]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#else - int ub = yuvconstants->kUVToB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = yuvconstants->kUVToR[1]; - int bb = yuvconstants->kUVBiasB[0]; - int bg = yuvconstants->kUVBiasG[0]; - int br = yuvconstants->kUVBiasR[0]; - int yg = yuvconstants->kYToRgb[0]; -#endif - - uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = (int)(-(u * ub) + y1 + bb); - *g = (int)(-(u * ug + v * vg) + y1 + bg); - *r = (int)(-(v * vr) + y1 + br); + LOAD_YUV_CONSTANTS; + uint32_t y32 = y * 0x0101; + CALC_RGB16; + *b = b16; + *g = g16; + *r = r16; } // C reference code that mimics the YUV 16 bit assembly. // Reads 10 bit YUV and leaves result as 16 bit. -static __inline void YuvPixel16(int16_t y, - int16_t u, - int16_t v, - int* b, - int* g, - int* r, - const struct YuvConstants* yuvconstants) { -#if defined(__aarch64__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = -yuvconstants->kUVToRB[1]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#elif defined(__arm__) - int ub = -yuvconstants->kUVToRB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[4]; - int vr = -yuvconstants->kUVToRB[4]; - int bb = yuvconstants->kUVBiasBGR[0]; - int bg = yuvconstants->kUVBiasBGR[1]; - int br = yuvconstants->kUVBiasBGR[2]; - int yg = yuvconstants->kYToRgb[1]; -#else - int ub = yuvconstants->kUVToB[0]; - int ug = yuvconstants->kUVToG[0]; - int vg = yuvconstants->kUVToG[1]; - int vr = yuvconstants->kUVToR[1]; - int bb = yuvconstants->kUVBiasB[0]; - int bg = yuvconstants->kUVBiasG[0]; - int br = yuvconstants->kUVBiasR[0]; - int yg = yuvconstants->kYToRgb[0]; -#endif - - uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16; +static __inline void YuvPixel10_16(uint16_t y, + uint16_t u, + uint16_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { + LOAD_YUV_CONSTANTS; + uint32_t y32 = y << 6; u = clamp255(u >> 2); v = clamp255(v >> 2); - *b = (int)(-(u * ub) + y1 + bb); - *g = (int)(-(u * ug + v * vg) + y1 + bg); - *r = (int)(-(v * vr) + y1 + br); + CALC_RGB16; + *b = b16; + *g = g16; + *r = r16; +} + +// C reference code that mimics the YUV 16 bit assembly. +// Reads 12 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel12_16(int16_t y, + int16_t u, + int16_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { + LOAD_YUV_CONSTANTS; + uint32_t y32 = y << 4; + u = clamp255(u >> 4); + v = clamp255(v >> 4); + CALC_RGB16; + *b = b16; + *g = g16; + *r = r16; } // C reference code that mimics the YUV 10 bit assembly. @@ -1690,22 +1736,78 @@ static __inline void YuvPixel10(uint16_t y, int b16; int g16; int r16; - YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants); + YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants); + *b = Clamp(b16 >> 6); + *g = Clamp(g16 >> 6); + *r = Clamp(r16 >> 6); +} + +// C reference code that mimics the YUV 12 bit assembly. +// Reads 12 bit YUV and clamps down to 8 bit RGB. +static __inline void YuvPixel12(uint16_t y, + uint16_t u, + uint16_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, + const struct YuvConstants* yuvconstants) { + int b16; + int g16; + int r16; + YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants); *b = Clamp(b16 >> 6); *g = Clamp(g16 >> 6); *r = Clamp(r16 >> 6); } +// C reference code that mimics the YUV 16 bit assembly. +// Reads 16 bit YUV and leaves result as 8 bit. +static __inline void YuvPixel16_8(uint16_t y, + uint16_t u, + uint16_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, + const struct YuvConstants* yuvconstants) { + LOAD_YUV_CONSTANTS; + uint32_t y32 = y; + u = clamp255(u >> 8); + v = clamp255(v >> 8); + CALC_RGB16; + *b = Clamp((int32_t)(b16) >> 6); + *g = Clamp((int32_t)(g16) >> 6); + *r = Clamp((int32_t)(r16) >> 6); +} + +// C reference code that mimics the YUV 16 bit assembly. +// Reads 16 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel16_16(uint16_t y, + uint16_t u, + uint16_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { + LOAD_YUV_CONSTANTS; + uint32_t y32 = y; + u = clamp255(u >> 8); + v = clamp255(v >> 8); + CALC_RGB16; + *b = b16; + *g = g16; + *r = r16; +} + // C reference code that mimics the YUV assembly. -// Reads 8 bit YUV and leaves result as 16 bit. +// Reads 8 bit YUV and leaves result as 8 bit. static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) || defined(__arm__) - int ygb = yuvconstants->kUVBiasBGR[3]; - int yg = yuvconstants->kYToRgb[1]; + int yg = yuvconstants->kRGBCoeffBias[0]; + int ygb = yuvconstants->kRGBCoeffBias[4]; #else int ygb = yuvconstants->kYBiasToRgb[0]; int yg = yuvconstants->kYToRgb[0]; @@ -1716,38 +1818,6 @@ static __inline void YPixel(uint8_t y, *r = Clamp(((int32_t)(y1) + ygb) >> 6); } -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) -// C mimic assembly. -// TODO(fbarchard): Remove subsampling from Neon. -void I444ToARGBRow_C(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; - uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; - YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, - yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, - yuvconstants); - rgb_buf[7] = 255; - src_y += 2; - src_u += 2; - src_v += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, - rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - } -} -#else void I444ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1765,7 +1835,6 @@ void I444ToARGBRow_C(const uint8_t* src_y, rgb_buf += 4; // Advance 1 pixel. } } -#endif // Also used for 420 void I422ToARGBRow_C(const uint8_t* src_y, @@ -1821,9 +1890,102 @@ void I210ToARGBRow_C(const uint16_t* src_y, } } +void I410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + src_y += 1; + src_u += 1; + src_v += 1; + rgb_buf += 4; // Advance 1 pixels. + } +} + +void I210AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = clamp255(src_a[0] >> 2); + YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); + rgb_buf[7] = clamp255(src_a[1] >> 2); + src_y += 2; + src_u += 1; + src_v += 1; + src_a += 2; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = clamp255(src_a[0] >> 2); + } +} + +void I410AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = clamp255(src_a[0] >> 2); + src_y += 1; + src_u += 1; + src_v += 1; + src_a += 1; + rgb_buf += 4; // Advance 1 pixels. + } +} + +// 12 bit YUV to ARGB +void I212ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); + rgb_buf[7] = 255; + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + } +} + static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) { uint32_t ar30; - b = b >> 4; // convert 10.6 to 10 bit. + b = b >> 4; // convert 8 bit 10.6 to 10 bit. g = g >> 4; r = r >> 4; b = Clamp10(b); @@ -1845,9 +2007,9 @@ void I210ToAR30Row_C(const uint16_t* src_y, int g; int r; for (x = 0; x < width - 1; x += 2) { - YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); - YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf + 4, b, g, r); src_y += 2; src_u += 1; @@ -1855,16 +2017,15 @@ void I210ToAR30Row_C(const uint16_t* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); } } -// 8 bit YUV to 10 bit AR30 -// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. -void I422ToAR30Row_C(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, +// 12 bit YUV to 10 bit AR30 +void I212ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { @@ -1873,9 +2034,9 @@ void I422ToAR30Row_C(const uint8_t* src_y, int g; int r; for (x = 0; x < width - 1; x += 2) { - YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); - YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf + 4, b, g, r); src_y += 2; src_u += 1; @@ -1883,45 +2044,142 @@ void I422ToAR30Row_C(const uint8_t* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); } } -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) -// C mimic assembly. -// TODO(fbarchard): Remove subsampling from Neon. -void I444AlphaToARGBRow_C(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - const uint8_t* src_a, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { +void I410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width; ++x) { + YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + src_y += 1; + src_u += 1; + src_v += 1; + rgb_buf += 4; // Advance 1 pixel. + } +} + +// P210 has 10 bits in msb of 16 bit NV12 style layout. +void P210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; - uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; - YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, - yuvconstants); - rgb_buf[3] = src_a[0]; - YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, - yuvconstants); - rgb_buf[7] = src_a[1]; + YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1, + dst_argb + 2, yuvconstants); + dst_argb[3] = 255; + YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5, + dst_argb + 6, yuvconstants); + dst_argb[7] = 255; src_y += 2; - src_u += 2; - src_v += 2; - src_a += 2; + src_uv += 2; + dst_argb += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1, + dst_argb + 2, yuvconstants); + dst_argb[3] = 255; + } +} + +void P410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1, + dst_argb + 2, yuvconstants); + dst_argb[3] = 255; + src_y += 1; + src_uv += 2; + dst_argb += 4; // Advance 1 pixels. + } +} + +void P210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants); + StoreAR30(dst_ar30, b, g, r); + YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants); + StoreAR30(dst_ar30 + 4, b, g, r); + src_y += 2; + src_uv += 2; + dst_ar30 += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants); + StoreAR30(dst_ar30, b, g, r); + } +} + +void P410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width; ++x) { + YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants); + StoreAR30(dst_ar30, b, g, r); + src_y += 1; + src_uv += 2; + dst_ar30 += 4; // Advance 1 pixel. + } +} + +// 8 bit YUV to 10 bit AR30 +// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf + 4, b, g, r); + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, - rgb_buf + 2, yuvconstants); - rgb_buf[3] = src_a[0]; + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); } } -#else + void I444AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1941,7 +2199,6 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y, rgb_buf += 4; // Advance 1 pixel. } } -#endif void I422AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, @@ -2492,6 +2749,105 @@ void MergeARGBRow_C(const uint8_t* src_r, } } +void MergeXR30Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width) { + assert(depth >= 10); + assert(depth <= 16); + int x; + int shift = depth - 10; + uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30; + for (x = 0; x < width; ++x) { + uint32_t r = clamp1023(src_r[x] >> shift); + uint32_t g = clamp1023(src_g[x] >> shift); + uint32_t b = clamp1023(src_b[x] >> shift); + dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000; + } +} + +void MergeAR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width) { + assert(depth >= 1); + assert(depth <= 16); + int x; + int shift = 16 - depth; + int max = (1 << depth) - 1; + for (x = 0; x < width; ++x) { + dst_ar64[0] = ClampMax(src_b[x], max) << shift; + dst_ar64[1] = ClampMax(src_g[x], max) << shift; + dst_ar64[2] = ClampMax(src_r[x], max) << shift; + dst_ar64[3] = ClampMax(src_a[x], max) << shift; + dst_ar64 += 4; + } +} + +void MergeARGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width) { + assert(depth >= 8); + assert(depth <= 16); + int x; + int shift = depth - 8; + for (x = 0; x < width; ++x) { + dst_argb[0] = clamp255(src_b[x] >> shift); + dst_argb[1] = clamp255(src_g[x] >> shift); + dst_argb[2] = clamp255(src_r[x] >> shift); + dst_argb[3] = clamp255(src_a[x] >> shift); + dst_argb += 4; + } +} + +void MergeXR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width) { + assert(depth >= 1); + assert(depth <= 16); + int x; + int shift = 16 - depth; + int max = (1 << depth) - 1; + for (x = 0; x < width; ++x) { + dst_ar64[0] = ClampMax(src_b[x], max) << shift; + dst_ar64[1] = ClampMax(src_g[x], max) << shift; + dst_ar64[2] = ClampMax(src_r[x], max) << shift; + dst_ar64[3] = 0xffff; + dst_ar64 += 4; + } +} + +void MergeXRGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width) { + assert(depth >= 8); + assert(depth <= 16); + int x; + int shift = depth - 8; + for (x = 0; x < width; ++x) { + dst_argb[0] = clamp255(src_b[x] >> shift); + dst_argb[1] = clamp255(src_g[x] >> shift); + dst_argb[2] = clamp255(src_r[x] >> shift); + dst_argb[3] = 0xff; + dst_argb += 4; + } +} + void SplitXRGBRow_C(const uint8_t* src_argb, uint8_t* dst_r, uint8_t* dst_g, @@ -2528,6 +2884,8 @@ void MergeUVRow_16_C(const uint16_t* src_u, int depth, int width) { int shift = 16 - depth; + assert(depth >= 8); + assert(depth <= 16); int x; for (x = 0; x < width; ++x) { dst_uv[0] = src_u[x] << shift; @@ -2544,6 +2902,8 @@ void SplitUVRow_16_C(const uint16_t* src_uv, int width) { int shift = 16 - depth; int x; + assert(depth >= 8); + assert(depth <= 16); for (x = 0; x < width; ++x) { dst_u[x] = src_uv[0] >> shift; dst_v[x] = src_uv[1] >> shift; @@ -2581,6 +2941,9 @@ void Convert16To8Row_C(const uint16_t* src_y, int scale, int width) { int x; + assert(scale >= 256); + assert(scale <= 32768); + for (x = 0; x < width; ++x) { dst_y[x] = clamp255((src_y[x] * scale) >> 16); } @@ -2714,19 +3077,19 @@ void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f) -// Blend src_argb0 over src_argb1 and store to dst_argb. -// dst_argb may be src_argb0 or src_argb1. +// Blend src_argb over src_argb1 and store to dst_argb. +// dst_argb may be src_argb or src_argb1. // This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_C(const uint8_t* src_argb0, +void ARGBBlendRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint32_t fb = src_argb0[0]; - uint32_t fg = src_argb0[1]; - uint32_t fr = src_argb0[2]; - uint32_t a = src_argb0[3]; + uint32_t fb = src_argb[0]; + uint32_t fg = src_argb[1]; + uint32_t fr = src_argb[2]; + uint32_t a = src_argb[3]; uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; @@ -2735,10 +3098,10 @@ void ARGBBlendRow_C(const uint8_t* src_argb0, dst_argb[2] = BLEND(fr, br, a); dst_argb[3] = 255u; - fb = src_argb0[4 + 0]; - fg = src_argb0[4 + 1]; - fr = src_argb0[4 + 2]; - a = src_argb0[4 + 3]; + fb = src_argb[4 + 0]; + fg = src_argb[4 + 1]; + fr = src_argb[4 + 2]; + a = src_argb[4 + 3]; bb = src_argb1[4 + 0]; bg = src_argb1[4 + 1]; br = src_argb1[4 + 2]; @@ -2746,16 +3109,16 @@ void ARGBBlendRow_C(const uint8_t* src_argb0, dst_argb[4 + 1] = BLEND(fg, bg, a); dst_argb[4 + 2] = BLEND(fr, br, a); dst_argb[4 + 3] = 255u; - src_argb0 += 8; + src_argb += 8; src_argb1 += 8; dst_argb += 8; } if (width & 1) { - uint32_t fb = src_argb0[0]; - uint32_t fg = src_argb0[1]; - uint32_t fr = src_argb0[2]; - uint32_t a = src_argb0[3]; + uint32_t fb = src_argb[0]; + uint32_t fg = src_argb[1]; + uint32_t fr = src_argb[2]; + uint32_t a = src_argb[3]; uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; @@ -3280,7 +3643,7 @@ void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { // Maximum temporary width for wrappers to process at a time, in pixels. #define MAXTWIDTH 2048 -#if !(defined(_MSC_VER) && defined(_M_IX86)) && \ +#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \ defined(HAS_I422TORGB565ROW_SSSE3) // row_win.cc has asm version, but GCC uses 2 step wrapper. void I422ToRGB565Row_SSSE3(const uint8_t* src_y, @@ -3747,13 +4110,14 @@ void NV21ToYUV24Row_C(const uint8_t* src_y, } // Filter 2 rows of AYUV UV's (444) into UV (420). +// AYUV is VUYA in memory. UV for NV12 is UV order in memory. void AYUVToUVRow_C(const uint8_t* src_ayuv, int src_stride_ayuv, uint8_t* dst_uv, int width) { // Output a row of UV values, filtering 2x2 rows of AYUV. int x; - for (x = 0; x < width; x += 2) { + for (x = 0; x < width - 1; x += 2) { dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] + src_ayuv[src_stride_ayuv + 5] + 2) >> 2; @@ -3764,12 +4128,8 @@ void AYUVToUVRow_C(const uint8_t* src_ayuv, dst_uv += 2; } if (width & 1) { - dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + - src_ayuv[src_stride_ayuv + 0] + 2) >> - 2; - dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + - src_ayuv[src_stride_ayuv + 1] + 2) >> - 2; + dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1; + dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1; } } @@ -3780,7 +4140,7 @@ void AYUVToVURow_C(const uint8_t* src_ayuv, int width) { // Output a row of VU values, filtering 2x2 rows of AYUV. int x; - for (x = 0; x < width; x += 2) { + for (x = 0; x < width - 1; x += 2) { dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] + src_ayuv[src_stride_ayuv + 4] + 2) >> 2; @@ -3791,12 +4151,8 @@ void AYUVToVURow_C(const uint8_t* src_ayuv, dst_vu += 2; } if (width & 1) { - dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + - src_ayuv[src_stride_ayuv + 0] + 2) >> - 2; - dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + - src_ayuv[src_stride_ayuv + 1] + 2) >> - 2; + dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1; + dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1; } } |