aboutsummaryrefslogtreecommitdiff
path: root/third_party/libyuv/source/row_common.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libyuv/source/row_common.cc')
-rw-r--r--third_party/libyuv/source/row_common.cc1230
1 files changed, 793 insertions, 437 deletions
diff --git a/third_party/libyuv/source/row_common.cc b/third_party/libyuv/source/row_common.cc
index a941c3f5fc..c6e412414e 100644
--- a/third_party/libyuv/source/row_common.cc
+++ b/third_party/libyuv/source/row_common.cc
@@ -10,6 +10,7 @@
#include "libyuv/row.h"
+#include <assert.h>
#include <stdio.h>
#include <string.h> // For memcpy and memset.
@@ -21,10 +22,14 @@ namespace libyuv {
extern "C" {
#endif
-// The following ifdef from row_win makes the C code match the row_win code,
-// which is 7 bit fixed point.
+// This macro control YUV to RGB using unsigned math to extend range of
+// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
+// LIBYUV_UNLIMITED_DATA
+
+// The following macro from row_win makes the C code match the row_win code,
+// which is 7 bit fixed point for ARGBToI420:
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
- (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
+ !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1
#endif
@@ -50,6 +55,11 @@ static __inline int32_t clamp1023(int32_t v) {
return (-(v >= 1023) | v) & 1023;
}
+// clamp to max
+static __inline int32_t ClampMax(int32_t v, int32_t max) {
+ return (-(v >= max) | v) & max;
+}
+
static __inline uint32_t Abs(int32_t v) {
int m = -(v < 0);
return (v + m) ^ m;
@@ -67,6 +77,10 @@ static __inline int32_t clamp1023(int32_t v) {
return (v > 1023) ? 1023 : v;
}
+static __inline int32_t ClampMax(int32_t v, int32_t max) {
+ return (v > max) ? max : v;
+}
+
static __inline uint32_t Abs(int32_t v) {
return (v < 0) ? -v : v;
}
@@ -413,6 +427,82 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
}
}
+void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_ar64[0] = src_argb[0] * 0x0101;
+ dst_ar64[1] = src_argb[1] * 0x0101;
+ dst_ar64[2] = src_argb[2] * 0x0101;
+ dst_ar64[3] = src_argb[3] * 0x0101;
+ dst_ar64 += 4;
+ src_argb += 4;
+ }
+}
+
+void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_ab64[0] = src_argb[2] * 0x0101;
+ dst_ab64[1] = src_argb[1] * 0x0101;
+ dst_ab64[2] = src_argb[0] * 0x0101;
+ dst_ab64[3] = src_argb[3] * 0x0101;
+ dst_ab64 += 4;
+ src_argb += 4;
+ }
+}
+
+void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_argb[0] = src_ar64[0] >> 8;
+ dst_argb[1] = src_ar64[1] >> 8;
+ dst_argb[2] = src_ar64[2] >> 8;
+ dst_argb[3] = src_ar64[3] >> 8;
+ dst_argb += 4;
+ src_ar64 += 4;
+ }
+}
+
+void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_argb[0] = src_ab64[2] >> 8;
+ dst_argb[1] = src_ab64[1] >> 8;
+ dst_argb[2] = src_ab64[0] >> 8;
+ dst_argb[3] = src_ab64[3] >> 8;
+ dst_argb += 4;
+ src_ab64 += 4;
+ }
+}
+
+// TODO(fbarchard): Make shuffle compatible with SIMD versions
+void AR64ShuffleRow_C(const uint8_t* src_ar64,
+ uint8_t* dst_ar64,
+ const uint8_t* shuffler,
+ int width) {
+ const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
+ uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
+ int index0 = shuffler[0] / 2;
+ int index1 = shuffler[2] / 2;
+ int index2 = shuffler[4] / 2;
+ int index3 = shuffler[6] / 2;
+ // Shuffle a row of AR64.
+ int x;
+ for (x = 0; x < width / 2; ++x) {
+ // To support in-place conversion.
+ uint16_t b = src_ar64_16[index0];
+ uint16_t g = src_ar64_16[index1];
+ uint16_t r = src_ar64_16[index2];
+ uint16_t a = src_ar64_16[index3];
+ dst_ar64_16[0] = b;
+ dst_ar64_16[1] = g;
+ dst_ar64_16[2] = r;
+ dst_ar64_16[3] = a;
+ src_ar64_16 += 4;
+ dst_ar64_16 += 4;
+ }
+}
+
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
@@ -462,80 +552,80 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
-#define MAKEROWY(NAME, R, G, B, BPP) \
- void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
- uint8_t* dst_u, uint8_t* dst_v, int width) { \
- const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
- AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
- uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
- AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
- uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
- AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
- uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
- uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- } \
+#define MAKEROWY(NAME, R, G, B, BPP) \
+ void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
+ src_rgb += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
+ AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
+ uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
+ AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
+ uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
+ AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ src_rgb += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
+ uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
+ uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ } \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
-#define MAKEROWY(NAME, R, G, B, BPP) \
- void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
- uint8_t* dst_u, uint8_t* dst_v, int width) { \
- const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
- src_rgb1[B + BPP] + 1) >> \
- 1; \
- uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
- src_rgb1[G + BPP] + 1) >> \
- 1; \
- uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
- src_rgb1[R + BPP] + 1) >> \
- 1; \
- dst_u[0] = RGB2xToU(ar, ag, ab); \
- dst_v[0] = RGB2xToV(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
- uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
- uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
- dst_u[0] = RGB2xToU(ar, ag, ab); \
- dst_v[0] = RGB2xToV(ar, ag, ab); \
- } \
+#define MAKEROWY(NAME, R, G, B, BPP) \
+ void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
+ src_rgb += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
+ src_rgb1[B + BPP] + 1) >> \
+ 1; \
+ uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
+ src_rgb1[G + BPP] + 1) >> \
+ 1; \
+ uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
+ src_rgb1[R + BPP] + 1) >> \
+ 1; \
+ dst_u[0] = RGB2xToU(ar, ag, ab); \
+ dst_v[0] = RGB2xToV(ar, ag, ab); \
+ src_rgb += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint16_t ab = src_rgb[B] + src_rgb1[B]; \
+ uint16_t ag = src_rgb[G] + src_rgb1[G]; \
+ uint16_t ar = src_rgb[R] + src_rgb1[R]; \
+ dst_u[0] = RGB2xToU(ar, ag, ab); \
+ dst_v[0] = RGB2xToV(ar, ag, ab); \
+ } \
}
#endif
@@ -603,80 +693,80 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
// ARGBToYJ_C and ARGBToUVJ_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
- void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
- uint8_t* dst_u, uint8_t* dst_v, int width) { \
- const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
- AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
- uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
- AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
- uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
- AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
- uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
- uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- } \
+#define MAKEROWYJ(NAME, R, G, B, BPP) \
+ void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
+ src_rgb += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
+ AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
+ uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
+ AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
+ uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
+ AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ src_rgb += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
+ uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
+ uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ } \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
- void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
- } \
- void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
- uint8_t* dst_u, uint8_t* dst_v, int width) { \
- const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
- src_rgb1[B + BPP] + 1) >> \
- 1; \
- uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
- src_rgb1[G + BPP] + 1) >> \
- 1; \
- uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
- src_rgb1[R + BPP] + 1) >> \
- 1; \
- dst_u[0] = RGB2xToUJ(ar, ag, ab); \
- dst_v[0] = RGB2xToVJ(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
- uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
- uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
- dst_u[0] = RGB2xToUJ(ar, ag, ab); \
- dst_v[0] = RGB2xToVJ(ar, ag, ab); \
- } \
+#define MAKEROWYJ(NAME, R, G, B, BPP) \
+ void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
+ src_rgb += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
+ src_rgb1[B + BPP] + 1) >> \
+ 1; \
+ uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
+ src_rgb1[G + BPP] + 1) >> \
+ 1; \
+ uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
+ src_rgb1[R + BPP] + 1) >> \
+ 1; \
+ dst_u[0] = RGB2xToUJ(ar, ag, ab); \
+ dst_v[0] = RGB2xToVJ(ar, ag, ab); \
+ src_rgb += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
+ uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
+ uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
+ dst_u[0] = RGB2xToUJ(ar, ag, ab); \
+ dst_v[0] = RGB2xToVJ(ar, ag, ab); \
+ } \
}
#endif
@@ -1146,16 +1236,16 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
-void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
- const uint32_t b = REPEAT8(src_argb0[0]);
- const uint32_t g = REPEAT8(src_argb0[1]);
- const uint32_t r = REPEAT8(src_argb0[2]);
- const uint32_t a = REPEAT8(src_argb0[3]);
+ const uint32_t b = REPEAT8(src_argb[0]);
+ const uint32_t g = REPEAT8(src_argb[1]);
+ const uint32_t r = REPEAT8(src_argb[2]);
+ const uint32_t a = REPEAT8(src_argb[3]);
const uint32_t b_scale = src_argb1[0];
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
@@ -1164,7 +1254,7 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
- src_argb0 += 4;
+ src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
@@ -1174,16 +1264,16 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,
#define SHADE(f, v) clamp255(v + f)
-void ARGBAddRow_C(const uint8_t* src_argb0,
+void ARGBAddRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
+ const int b = src_argb[0];
+ const int g = src_argb[1];
+ const int r = src_argb[2];
+ const int a = src_argb[3];
const int b_add = src_argb1[0];
const int g_add = src_argb1[1];
const int r_add = src_argb1[2];
@@ -1192,7 +1282,7 @@ void ARGBAddRow_C(const uint8_t* src_argb0,
dst_argb[1] = SHADE(g, g_add);
dst_argb[2] = SHADE(r, r_add);
dst_argb[3] = SHADE(a, a_add);
- src_argb0 += 4;
+ src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
@@ -1201,16 +1291,16 @@ void ARGBAddRow_C(const uint8_t* src_argb0,
#define SHADE(f, v) clamp0(f - v)
-void ARGBSubtractRow_C(const uint8_t* src_argb0,
+void ARGBSubtractRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
+ const int b = src_argb[0];
+ const int g = src_argb[1];
+ const int r = src_argb[2];
+ const int a = src_argb[3];
const int b_sub = src_argb1[0];
const int g_sub = src_argb1[1];
const int r_sub = src_argb1[2];
@@ -1219,7 +1309,7 @@ void ARGBSubtractRow_C(const uint8_t* src_argb0,
dst_argb[1] = SHADE(g, g_sub);
dst_argb[2] = SHADE(r, r_sub);
dst_argb[3] = SHADE(a, a_sub);
- src_argb0 += 4;
+ src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
@@ -1329,64 +1419,36 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// Macros to create SIMD specific yuv to rgb conversion constants.
-#if defined(__aarch64__)
-#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
- const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
- {UB, VR, UB, VR, UB, VR, UB, VR}, {UB, VR, UB, VR, UB, VR, UB, VR}, \
- {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, \
- {BB, BG, BR, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; \
- const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
- {VR, UB, VR, UB, VR, UB, VR, UB}, {VR, UB, VR, UB, VR, UB, VR, UB}, \
- {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, \
- {BR, BG, BB, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}};
-
-#elif defined(__arm__)
-#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
- const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
- {UB, UB, UB, UB, VR, VR, VR, VR, 0, 0, 0, 0, 0, 0, 0, 0}, \
- {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \
- {BB, BG, BR, YB, 0, 0, 0, 0}, \
- {0x0101 * YG, YG, 0, 0}}; \
- const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
- {VR, VR, VR, VR, UB, UB, UB, UB, 0, 0, 0, 0, 0, 0, 0, 0}, \
- {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \
- {BR, BG, BB, YB, 0, 0, 0, 0}, \
- {0x0101 * YG, YG, 0, 0}};
+// clang-format off
+#if defined(__aarch64__) || defined(__arm__)
+// Bias values include subtract 128 from U and V, bias from Y and rounding.
+// For B and R bias is negative. For G bias is positive.
+#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
+ {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
+ {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
+ 0, 0}}
#else
-#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
- const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
- {-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, \
- -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0}, \
- {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
- {0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, \
- 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR}, \
- {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \
- {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \
- {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \
- {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
- {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; \
- const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
- {-VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, \
- -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0}, \
- {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \
- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \
- {0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, \
- 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB}, \
- {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \
- {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \
- {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \
- {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
- {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}};
+#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
+ {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
+ {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
+ {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
+ {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
#endif
-// TODO(fbarchard): Generate SIMD structures from float matrix.
+// clang-format on
-// Bias values to round, and subtract 128 from U and V.
-#define BB (-UB * 128 + YB)
-#define BG (UG * 128 + VG * 128 + YB)
-#define BR (-VR * 128 + YB)
+#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
+ const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
+ YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
+ const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
+ YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
+
+// TODO(fbarchard): Generate SIMD structures from float matrix.
// BT.601 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.596
@@ -1395,7 +1457,11 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
+#ifdef LIBYUV_UNLIMITED_DATA
+#define UB 129 /* round(2.018 * 64) */
+#else
#define UB 128 /* max(128, round(2.018 * 64)) */
+#endif
#define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */
#define VR 102 /* round(1.596 * 64) */
@@ -1404,7 +1470,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
-MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1429,7 +1495,7 @@ MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
-MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1444,9 +1510,12 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// B = (Y - 16) * 1.164 + U * 2.112
// KR = 0.2126, KB = 0.0722
-// TODO(fbarchard): Find way to express 2.112 instead of 2.0.
// U and V contributions to R,G,B.
+#ifdef LIBYUV_UNLIMITED_DATA
+#define UB 135 /* round(2.112 * 64) */
+#else
#define UB 128 /* max(128, round(2.112 * 64)) */
+#endif
#define UG 14 /* round(0.213 * 64) */
#define VG 34 /* round(0.533 * 64) */
#define VR 115 /* round(1.793 * 64) */
@@ -1455,7 +1524,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
-MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1480,7 +1549,7 @@ MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
-MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1495,9 +1564,12 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// B = (Y - 16) * 1.164384 + U * 2.14177
// KR = 0.2627; KB = 0.0593
-// TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
// U and V contributions to R,G,B.
+#ifdef LIBYUV_UNLIMITED_DATA
+#define UB 137 /* round(2.142 * 64) */
+#else
#define UB 128 /* max(128, round(2.142 * 64)) */
+#endif
#define UG 12 /* round(0.187326 * 64) */
#define VG 42 /* round(0.65042 * 64) */
#define VR 107 /* round(1.67867 * 64) */
@@ -1506,7 +1578,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
-MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1530,7 +1602,7 @@ MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
-MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
+MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
@@ -1545,6 +1617,42 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#undef MAKEYUVCONSTANTS
+#if defined(__aarch64__) || defined(__arm__)
+#define LOAD_YUV_CONSTANTS \
+ int ub = yuvconstants->kUVCoeff[0]; \
+ int vr = yuvconstants->kUVCoeff[1]; \
+ int ug = yuvconstants->kUVCoeff[2]; \
+ int vg = yuvconstants->kUVCoeff[3]; \
+ int yg = yuvconstants->kRGBCoeffBias[0]; \
+ int bb = yuvconstants->kRGBCoeffBias[1]; \
+ int bg = yuvconstants->kRGBCoeffBias[2]; \
+ int br = yuvconstants->kRGBCoeffBias[3]
+
+#define CALC_RGB16 \
+ int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
+ int b16 = y1 + (u * ub) - bb; \
+ int g16 = y1 + bg - (u * ug + v * vg); \
+ int r16 = y1 + (v * vr) - br
+#else
+#define LOAD_YUV_CONSTANTS \
+ int ub = yuvconstants->kUVToB[0]; \
+ int ug = yuvconstants->kUVToG[0]; \
+ int vg = yuvconstants->kUVToG[1]; \
+ int vr = yuvconstants->kUVToR[1]; \
+ int yg = yuvconstants->kYToRgb[0]; \
+ int yb = yuvconstants->kYBiasToRgb[0]
+
+#define CALC_RGB16 \
+ int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
+ int8_t ui = u; \
+ int8_t vi = v; \
+ ui -= 0x80; \
+ vi -= 0x80; \
+ int b16 = y1 + (ui * ub); \
+ int g16 = y1 - (ui * ug + vi * vg); \
+ int r16 = y1 + (vi * vr)
+#endif
+
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
@@ -1554,39 +1662,12 @@ static __inline void YuvPixel(uint8_t y,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
-#if defined(__aarch64__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = -yuvconstants->kUVToRB[1];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#elif defined(__arm__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[4];
- int vr = -yuvconstants->kUVToRB[4];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#else
- int ub = yuvconstants->kUVToB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = yuvconstants->kUVToR[1];
- int bb = yuvconstants->kUVBiasB[0];
- int bg = yuvconstants->kUVBiasG[0];
- int br = yuvconstants->kUVBiasR[0];
- int yg = yuvconstants->kYToRgb[0];
-#endif
-
- uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
- *b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6);
- *g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6);
- *r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6);
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y * 0x0101;
+ CALC_RGB16;
+ *b = Clamp((int32_t)(b16) >> 6);
+ *g = Clamp((int32_t)(g16) >> 6);
+ *r = Clamp((int32_t)(r16) >> 6);
}
// Reads 8 bit YUV and leaves result as 16 bit.
@@ -1597,85 +1678,50 @@ static __inline void YuvPixel8_16(uint8_t y,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
-#if defined(__aarch64__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = -yuvconstants->kUVToRB[1];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#elif defined(__arm__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[4];
- int vr = -yuvconstants->kUVToRB[4];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#else
- int ub = yuvconstants->kUVToB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = yuvconstants->kUVToR[1];
- int bb = yuvconstants->kUVBiasB[0];
- int bg = yuvconstants->kUVBiasG[0];
- int br = yuvconstants->kUVBiasR[0];
- int yg = yuvconstants->kYToRgb[0];
-#endif
-
- uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
- *b = (int)(-(u * ub) + y1 + bb);
- *g = (int)(-(u * ug + v * vg) + y1 + bg);
- *r = (int)(-(v * vr) + y1 + br);
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y * 0x0101;
+ CALC_RGB16;
+ *b = b16;
+ *g = g16;
+ *r = r16;
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 10 bit YUV and leaves result as 16 bit.
-static __inline void YuvPixel16(int16_t y,
- int16_t u,
- int16_t v,
- int* b,
- int* g,
- int* r,
- const struct YuvConstants* yuvconstants) {
-#if defined(__aarch64__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = -yuvconstants->kUVToRB[1];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#elif defined(__arm__)
- int ub = -yuvconstants->kUVToRB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[4];
- int vr = -yuvconstants->kUVToRB[4];
- int bb = yuvconstants->kUVBiasBGR[0];
- int bg = yuvconstants->kUVBiasBGR[1];
- int br = yuvconstants->kUVBiasBGR[2];
- int yg = yuvconstants->kYToRgb[1];
-#else
- int ub = yuvconstants->kUVToB[0];
- int ug = yuvconstants->kUVToG[0];
- int vg = yuvconstants->kUVToG[1];
- int vr = yuvconstants->kUVToR[1];
- int bb = yuvconstants->kUVBiasB[0];
- int bg = yuvconstants->kUVBiasG[0];
- int br = yuvconstants->kUVBiasR[0];
- int yg = yuvconstants->kYToRgb[0];
-#endif
-
- uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
+static __inline void YuvPixel10_16(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y << 6;
u = clamp255(u >> 2);
v = clamp255(v >> 2);
- *b = (int)(-(u * ub) + y1 + bb);
- *g = (int)(-(u * ug + v * vg) + y1 + bg);
- *r = (int)(-(v * vr) + y1 + br);
+ CALC_RGB16;
+ *b = b16;
+ *g = g16;
+ *r = r16;
+}
+
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 12 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel12_16(int16_t y,
+ int16_t u,
+ int16_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y << 4;
+ u = clamp255(u >> 4);
+ v = clamp255(v >> 4);
+ CALC_RGB16;
+ *b = b16;
+ *g = g16;
+ *r = r16;
}
// C reference code that mimics the YUV 10 bit assembly.
@@ -1690,22 +1736,78 @@ static __inline void YuvPixel10(uint16_t y,
int b16;
int g16;
int r16;
- YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
+ YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
+ *b = Clamp(b16 >> 6);
+ *g = Clamp(g16 >> 6);
+ *r = Clamp(r16 >> 6);
+}
+
+// C reference code that mimics the YUV 12 bit assembly.
+// Reads 12 bit YUV and clamps down to 8 bit RGB.
+static __inline void YuvPixel12(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
+ const struct YuvConstants* yuvconstants) {
+ int b16;
+ int g16;
+ int r16;
+ YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = Clamp(b16 >> 6);
*g = Clamp(g16 >> 6);
*r = Clamp(r16 >> 6);
}
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 16 bit YUV and leaves result as 8 bit.
+static __inline void YuvPixel16_8(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
+ const struct YuvConstants* yuvconstants) {
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y;
+ u = clamp255(u >> 8);
+ v = clamp255(v >> 8);
+ CALC_RGB16;
+ *b = Clamp((int32_t)(b16) >> 6);
+ *g = Clamp((int32_t)(g16) >> 6);
+ *r = Clamp((int32_t)(r16) >> 6);
+}
+
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 16 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel16_16(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+ LOAD_YUV_CONSTANTS;
+ uint32_t y32 = y;
+ u = clamp255(u >> 8);
+ v = clamp255(v >> 8);
+ CALC_RGB16;
+ *b = b16;
+ *g = g16;
+ *r = r16;
+}
+
// C reference code that mimics the YUV assembly.
-// Reads 8 bit YUV and leaves result as 16 bit.
+// Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YPixel(uint8_t y,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) || defined(__arm__)
- int ygb = yuvconstants->kUVBiasBGR[3];
- int yg = yuvconstants->kYToRgb[1];
+ int yg = yuvconstants->kRGBCoeffBias[0];
+ int ygb = yuvconstants->kRGBCoeffBias[4];
#else
int ygb = yuvconstants->kYBiasToRgb[0];
int yg = yuvconstants->kYToRgb[0];
@@ -1716,38 +1818,6 @@ static __inline void YPixel(uint8_t y,
*r = Clamp(((int32_t)(y1) + ygb) >> 6);
}
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
-// C mimic assembly.
-// TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
- uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
- yuvconstants);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
- yuvconstants);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 2;
- src_v += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
- rgb_buf + 2, yuvconstants);
- rgb_buf[3] = 255;
- }
-}
-#else
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1765,7 +1835,6 @@ void I444ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 4; // Advance 1 pixel.
}
}
-#endif
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
@@ -1821,9 +1890,102 @@ void I210ToARGBRow_C(const uint16_t* src_y,
}
}
+void I410ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 4; // Advance 1 pixels.
+ }
+}
+
+void I210AlphaToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ const uint16_t* src_a,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = clamp255(src_a[0] >> 2);
+ YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = clamp255(src_a[1] >> 2);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ src_a += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = clamp255(src_a[0] >> 2);
+ }
+}
+
+void I410AlphaToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ const uint16_t* src_a,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = clamp255(src_a[0] >> 2);
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ src_a += 1;
+ rgb_buf += 4; // Advance 1 pixels.
+ }
+}
+
+// 12 bit YUV to ARGB
+void I212ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
uint32_t ar30;
- b = b >> 4; // convert 10.6 to 10 bit.
+ b = b >> 4; // convert 8 bit 10.6 to 10 bit.
g = g >> 4;
r = r >> 4;
b = Clamp10(b);
@@ -1845,9 +2007,9 @@ void I210ToAR30Row_C(const uint16_t* src_y,
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
- YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
@@ -1855,16 +2017,15 @@ void I210ToAR30Row_C(const uint16_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
-// 8 bit YUV to 10 bit AR30
-// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
-void I422ToAR30Row_C(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
+// 12 bit YUV to 10 bit AR30
+void I212ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
@@ -1873,9 +2034,9 @@ void I422ToAR30Row_C(const uint8_t* src_y,
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
- YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
@@ -1883,45 +2044,142 @@ void I422ToAR30Row_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
-// C mimic assembly.
-// TODO(fbarchard): Remove subsampling from Neon.
-void I444AlphaToARGBRow_C(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- const uint8_t* src_a,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
+void I410ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width; ++x) {
+ YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 4; // Advance 1 pixel.
+ }
+}
+
+// P210 has 10 bits in msb of 16 bit NV12 style layout.
+void P210ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
- uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
- yuvconstants);
- rgb_buf[3] = src_a[0];
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
- yuvconstants);
- rgb_buf[7] = src_a[1];
+ YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
+ dst_argb + 2, yuvconstants);
+ dst_argb[3] = 255;
+ YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
+ dst_argb + 6, yuvconstants);
+ dst_argb[7] = 255;
src_y += 2;
- src_u += 2;
- src_v += 2;
- src_a += 2;
+ src_uv += 2;
+ dst_argb += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
+ dst_argb + 2, yuvconstants);
+ dst_argb[3] = 255;
+ }
+}
+
+void P410ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
+ dst_argb + 2, yuvconstants);
+ dst_argb[3] = 255;
+ src_y += 1;
+ src_uv += 2;
+ dst_argb += 4; // Advance 1 pixels.
+ }
+}
+
+void P210ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_uv,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+ StoreAR30(dst_ar30, b, g, r);
+ YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+ StoreAR30(dst_ar30 + 4, b, g, r);
+ src_y += 2;
+ src_uv += 2;
+ dst_ar30 += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+ StoreAR30(dst_ar30, b, g, r);
+ }
+}
+
+void P410ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_uv,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width; ++x) {
+ YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+ StoreAR30(dst_ar30, b, g, r);
+ src_y += 1;
+ src_uv += 2;
+ dst_ar30 += 4; // Advance 1 pixel.
+ }
+}
+
+// 8 bit YUV to 10 bit AR30
+// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
+void I422ToAR30Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf + 4, b, g, r);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
- rgb_buf + 2, yuvconstants);
- rgb_buf[3] = src_a[0];
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
}
}
-#else
+
void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1941,7 +2199,6 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 4; // Advance 1 pixel.
}
}
-#endif
void I422AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
@@ -2492,6 +2749,105 @@ void MergeARGBRow_C(const uint8_t* src_r,
}
}
+void MergeXR30Row_C(const uint16_t* src_r,
+ const uint16_t* src_g,
+ const uint16_t* src_b,
+ uint8_t* dst_ar30,
+ int depth,
+ int width) {
+ assert(depth >= 10);
+ assert(depth <= 16);
+ int x;
+ int shift = depth - 10;
+ uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
+ for (x = 0; x < width; ++x) {
+ uint32_t r = clamp1023(src_r[x] >> shift);
+ uint32_t g = clamp1023(src_g[x] >> shift);
+ uint32_t b = clamp1023(src_b[x] >> shift);
+ dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
+ }
+}
+
+void MergeAR64Row_C(const uint16_t* src_r,
+ const uint16_t* src_g,
+ const uint16_t* src_b,
+ const uint16_t* src_a,
+ uint16_t* dst_ar64,
+ int depth,
+ int width) {
+ assert(depth >= 1);
+ assert(depth <= 16);
+ int x;
+ int shift = 16 - depth;
+ int max = (1 << depth) - 1;
+ for (x = 0; x < width; ++x) {
+ dst_ar64[0] = ClampMax(src_b[x], max) << shift;
+ dst_ar64[1] = ClampMax(src_g[x], max) << shift;
+ dst_ar64[2] = ClampMax(src_r[x], max) << shift;
+ dst_ar64[3] = ClampMax(src_a[x], max) << shift;
+ dst_ar64 += 4;
+ }
+}
+
+void MergeARGB16To8Row_C(const uint16_t* src_r,
+ const uint16_t* src_g,
+ const uint16_t* src_b,
+ const uint16_t* src_a,
+ uint8_t* dst_argb,
+ int depth,
+ int width) {
+ assert(depth >= 8);
+ assert(depth <= 16);
+ int x;
+ int shift = depth - 8;
+ for (x = 0; x < width; ++x) {
+ dst_argb[0] = clamp255(src_b[x] >> shift);
+ dst_argb[1] = clamp255(src_g[x] >> shift);
+ dst_argb[2] = clamp255(src_r[x] >> shift);
+ dst_argb[3] = clamp255(src_a[x] >> shift);
+ dst_argb += 4;
+ }
+}
+
+void MergeXR64Row_C(const uint16_t* src_r,
+ const uint16_t* src_g,
+ const uint16_t* src_b,
+ uint16_t* dst_ar64,
+ int depth,
+ int width) {
+ assert(depth >= 1);
+ assert(depth <= 16);
+ int x;
+ int shift = 16 - depth;
+ int max = (1 << depth) - 1;
+ for (x = 0; x < width; ++x) {
+ dst_ar64[0] = ClampMax(src_b[x], max) << shift;
+ dst_ar64[1] = ClampMax(src_g[x], max) << shift;
+ dst_ar64[2] = ClampMax(src_r[x], max) << shift;
+ dst_ar64[3] = 0xffff;
+ dst_ar64 += 4;
+ }
+}
+
+void MergeXRGB16To8Row_C(const uint16_t* src_r,
+ const uint16_t* src_g,
+ const uint16_t* src_b,
+ uint8_t* dst_argb,
+ int depth,
+ int width) {
+ assert(depth >= 8);
+ assert(depth <= 16);
+ int x;
+ int shift = depth - 8;
+ for (x = 0; x < width; ++x) {
+ dst_argb[0] = clamp255(src_b[x] >> shift);
+ dst_argb[1] = clamp255(src_g[x] >> shift);
+ dst_argb[2] = clamp255(src_r[x] >> shift);
+ dst_argb[3] = 0xff;
+ dst_argb += 4;
+ }
+}
+
void SplitXRGBRow_C(const uint8_t* src_argb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2528,6 +2884,8 @@ void MergeUVRow_16_C(const uint16_t* src_u,
int depth,
int width) {
int shift = 16 - depth;
+ assert(depth >= 8);
+ assert(depth <= 16);
int x;
for (x = 0; x < width; ++x) {
dst_uv[0] = src_u[x] << shift;
@@ -2544,6 +2902,8 @@ void SplitUVRow_16_C(const uint16_t* src_uv,
int width) {
int shift = 16 - depth;
int x;
+ assert(depth >= 8);
+ assert(depth <= 16);
for (x = 0; x < width; ++x) {
dst_u[x] = src_uv[0] >> shift;
dst_v[x] = src_uv[1] >> shift;
@@ -2581,6 +2941,9 @@ void Convert16To8Row_C(const uint16_t* src_y,
int scale,
int width) {
int x;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+
for (x = 0; x < width; ++x) {
dst_y[x] = clamp255((src_y[x] * scale) >> 16);
}
@@ -2714,19 +3077,19 @@ void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
+// Blend src_argb over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb or src_argb1.
// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8_t* src_argb0,
+void ARGBBlendRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
- uint32_t fb = src_argb0[0];
- uint32_t fg = src_argb0[1];
- uint32_t fr = src_argb0[2];
- uint32_t a = src_argb0[3];
+ uint32_t fb = src_argb[0];
+ uint32_t fg = src_argb[1];
+ uint32_t fr = src_argb[2];
+ uint32_t a = src_argb[3];
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
@@ -2735,10 +3098,10 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
dst_argb[2] = BLEND(fr, br, a);
dst_argb[3] = 255u;
- fb = src_argb0[4 + 0];
- fg = src_argb0[4 + 1];
- fr = src_argb0[4 + 2];
- a = src_argb0[4 + 3];
+ fb = src_argb[4 + 0];
+ fg = src_argb[4 + 1];
+ fr = src_argb[4 + 2];
+ a = src_argb[4 + 3];
bb = src_argb1[4 + 0];
bg = src_argb1[4 + 1];
br = src_argb1[4 + 2];
@@ -2746,16 +3109,16 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
dst_argb[4 + 1] = BLEND(fg, bg, a);
dst_argb[4 + 2] = BLEND(fr, br, a);
dst_argb[4 + 3] = 255u;
- src_argb0 += 8;
+ src_argb += 8;
src_argb1 += 8;
dst_argb += 8;
}
if (width & 1) {
- uint32_t fb = src_argb0[0];
- uint32_t fg = src_argb0[1];
- uint32_t fr = src_argb0[2];
- uint32_t a = src_argb0[3];
+ uint32_t fb = src_argb[0];
+ uint32_t fg = src_argb[1];
+ uint32_t fr = src_argb[2];
+ uint32_t a = src_argb[3];
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
@@ -3280,7 +3643,7 @@ void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
// Maximum temporary width for wrappers to process at a time, in pixels.
#define MAXTWIDTH 2048
-#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
+#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
defined(HAS_I422TORGB565ROW_SSSE3)
// row_win.cc has asm version, but GCC uses 2 step wrapper.
void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
@@ -3747,13 +4110,14 @@ void NV21ToYUV24Row_C(const uint8_t* src_y,
}
// Filter 2 rows of AYUV UV's (444) into UV (420).
+// AYUV is VUYA in memory. UV for NV12 is UV order in memory.
void AYUVToUVRow_C(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_uv,
int width) {
// Output a row of UV values, filtering 2x2 rows of AYUV.
int x;
- for (x = 0; x < width; x += 2) {
+ for (x = 0; x < width - 1; x += 2) {
dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
src_ayuv[src_stride_ayuv + 5] + 2) >>
2;
@@ -3764,12 +4128,8 @@ void AYUVToUVRow_C(const uint8_t* src_ayuv,
dst_uv += 2;
}
if (width & 1) {
- dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
- src_ayuv[src_stride_ayuv + 0] + 2) >>
- 2;
- dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
- src_ayuv[src_stride_ayuv + 1] + 2) >>
- 2;
+ dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
+ dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
}
}
@@ -3780,7 +4140,7 @@ void AYUVToVURow_C(const uint8_t* src_ayuv,
int width) {
// Output a row of VU values, filtering 2x2 rows of AYUV.
int x;
- for (x = 0; x < width; x += 2) {
+ for (x = 0; x < width - 1; x += 2) {
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
src_ayuv[src_stride_ayuv + 4] + 2) >>
2;
@@ -3791,12 +4151,8 @@ void AYUVToVURow_C(const uint8_t* src_ayuv,
dst_vu += 2;
}
if (width & 1) {
- dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
- src_ayuv[src_stride_ayuv + 0] + 2) >>
- 2;
- dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
- src_ayuv[src_stride_ayuv + 1] + 2) >>
- 2;
+ dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
+ dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
}
}