aboutsummaryrefslogtreecommitdiff
path: root/files/source/row_mmi.cc
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 04:46:39 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 04:46:39 +0000
commit61206b6b77e25faa5a07ffc7822a437487c3b996 (patch)
tree25cbeae94c26cad28a13bb089a662291b4b5d905 /files/source/row_mmi.cc
parent2618a02a89c99046741140d41b2cfda18c6dc9dd (diff)
parent3c4c137522d3a6759f2d96aee67149410ca2c1b9 (diff)
downloadlibyuv-61206b6b77e25faa5a07ffc7822a437487c3b996.tar.gz
Snap for 10453563 from 3c4c137522d3a6759f2d96aee67149410ca2c1b9 to mainline-conscrypt-release
Change-Id: Ie22a992a771a7de60db49aa6825555946cace106
Diffstat (limited to 'files/source/row_mmi.cc')
-rw-r--r--files/source/row_mmi.cc2450
1 files changed, 2125 insertions, 325 deletions
diff --git a/files/source/row_mmi.cc b/files/source/row_mmi.cc
index d8726d09..362fd1cf 100644
--- a/files/source/row_mmi.cc
+++ b/files/source/row_mmi.cc
@@ -21,6 +21,8 @@ extern "C" {
// This module is for Mips MMI.
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
+// clang-format off
+
void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
@@ -603,7 +605,7 @@ void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
: "memory");
}
-void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -611,8 +613,8 @@ void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -624,8 +626,8 @@ void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -637,8 +639,8 @@ void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -650,8 +652,8 @@ void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -669,35 +671,38 @@ void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
+void ARGBToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
+ const uint64_t mask_u = 0x0013002500380002;
+ const uint64_t mask_v = 0x00020038002f0009;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -707,15 +712,16 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
"pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -725,7 +731,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -741,8 +748,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src1], %[src0] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -752,15 +759,16 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
"pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -770,7 +778,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -786,8 +795,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src1], %[src0] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -797,15 +806,16 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
"pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -815,7 +825,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -831,8 +842,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src1], %[src0] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -842,15 +853,16 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
"pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -860,7 +872,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -888,7 +901,7 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -898,16 +911,17 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -915,8 +929,8 @@ void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -928,8 +942,8 @@ void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -941,8 +955,8 @@ void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -954,8 +968,8 @@ void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -973,35 +987,38 @@ void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
+void BGRAToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
+ const uint64_t mask_u = 0x0002003800250013;
+ const uint64_t mask_v = 0x0009002f00380002;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1011,15 +1028,16 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
"pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1029,7 +1047,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
@@ -1045,8 +1064,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src0], %[src1] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1056,15 +1075,16 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
"pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1074,7 +1094,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
@@ -1090,8 +1111,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src0], %[src1] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1101,15 +1122,16 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
"pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1119,7 +1141,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
@@ -1135,8 +1158,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src0], %[src1] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1146,15 +1169,16 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
"pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1164,7 +1188,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
@@ -1192,7 +1217,7 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -1202,16 +1227,17 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -1219,8 +1245,8 @@ void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1232,8 +1258,8 @@ void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1245,8 +1271,8 @@ void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1258,8 +1284,8 @@ void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1277,35 +1303,38 @@ void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
+void ABGRToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
+ const uint64_t mask_u = 0x0002003800250013;
+ const uint64_t mask_v = 0x0009002F00380002;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1315,15 +1344,16 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
"dsll %[dest0_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1333,7 +1363,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1349,8 +1380,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src0], %[src1] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1360,15 +1391,16 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
"dsll %[dest1_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1378,7 +1410,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1394,8 +1427,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src0], %[src1] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1405,15 +1438,16 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
"dsll %[dest2_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1423,7 +1457,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1439,8 +1474,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src0], %[src1] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1450,15 +1485,16 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
"dsll %[dest3_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1468,7 +1504,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1496,7 +1533,7 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -1506,16 +1543,17 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -1523,8 +1561,8 @@ void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1536,8 +1574,8 @@ void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1549,8 +1587,8 @@ void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1562,8 +1600,8 @@ void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1581,35 +1619,38 @@ void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
+void RGBAToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
+ const uint64_t mask_u = 0x0013002500380002;
+ const uint64_t mask_v = 0x00020038002f0009;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1619,15 +1660,16 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
"dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1637,7 +1679,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1653,8 +1696,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src1], %[src0] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1664,15 +1707,16 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
"dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1682,7 +1726,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1698,8 +1743,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src1], %[src0] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1709,15 +1754,16 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
"dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1727,7 +1773,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1743,8 +1790,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src1], %[src0] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1754,15 +1801,16 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
"dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1772,7 +1820,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"paddh %[src0], %[src0], %[src_lo] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
@@ -1800,7 +1849,7 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -1810,16 +1859,17 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -1827,8 +1877,8 @@ void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1841,8 +1891,8 @@ void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1855,8 +1905,8 @@ void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1869,8 +1919,8 @@ void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -1889,35 +1939,38 @@ void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
+void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
+ const uint64_t mask_u = 0x0013002500380002;
+ const uint64_t mask_v = 0x00020038002f0009;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1929,15 +1982,16 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
"pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1949,7 +2003,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -1965,8 +2020,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src1], %[src0] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1978,15 +2033,16 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
"pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -1998,7 +2054,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -2014,8 +2071,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src1], %[src0] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2027,15 +2084,16 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
"pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2047,7 +2105,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -2063,8 +2122,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src1], %[src0] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2076,15 +2135,16 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
"pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2096,7 +2156,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
@@ -2124,7 +2185,7 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -2134,16 +2195,17 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest0, dest1, dest2, dest3;
const uint64_t value = 0x1080;
@@ -2151,8 +2213,8 @@ void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
__asm__ volatile(
"1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -2165,8 +2227,8 @@ void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest0], %[dest0], %[src] \n\t"
"psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -2179,8 +2241,8 @@ void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest1], %[dest1], %[src] \n\t"
"psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -2193,8 +2255,8 @@ void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"paddw %[dest2], %[dest2], %[src] \n\t"
"psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
+ "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
+ "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
"punpcklbh %[src_lo], %[src], %[zero] \n\t"
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
"pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
@@ -2213,35 +2275,38 @@ void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
"gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
"gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
+ "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
"daddiu %[dst_y], %[dst_y], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
[dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
[dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
+ : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
[mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
[zero] "f"(0x00)
: "memory");
}
-void RAWToUVRow_MMI(const uint8_t* src_rgb0,
+void RAWToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
uint64_t src_rgb1;
- uint64_t ftmp[12];
+ uint64_t ftmp[13];
+ uint64_t tmp[1];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
+ const uint64_t mask_u = 0x0002003800250013;
+ const uint64_t mask_v = 0x0009002f00380002;
__asm__ volatile(
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2253,15 +2318,16 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
"dsll %[dest0_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2273,7 +2339,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -2289,8 +2356,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src0], %[src1] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2302,15 +2369,16 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
"dsll %[dest1_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2322,7 +2390,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -2338,8 +2407,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src0], %[src1] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2351,15 +2420,16 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
"dsll %[dest2_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2371,7 +2441,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -2387,8 +2458,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src0], %[src1] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2400,15 +2471,16 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
"dsll %[dest3_v], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
@@ -2420,7 +2492,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"dsll %[src1], %[src1], %[eight] \n\t"
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
"paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
+ "paddh %[src0], %[src0], %[ftmp12] \n\t"
+ "psrlh %[src0], %[src0], %[one] \n\t"
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
@@ -2448,7 +2521,7 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -2458,23 +2531,24 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
+ [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
[sixteen] "f"(0x10)
: "memory");
}
-void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
uint64_t src, src_hi, src_lo;
uint64_t dest, dest0, dest1, dest2, dest3;
uint64_t tmp0, tmp1;
- const uint64_t shift = 0x07;
- const uint64_t value = 0x0040;
+ const uint64_t shift = 0x08;
+ const uint64_t value = 0x80;
const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00010026004B000FULL;
+ const uint64_t mask1 = 0x0001004D0096001DULL;
__asm__ volatile(
"1: \n\t"
@@ -2544,13 +2618,13 @@ void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
[src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
[dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
[tmp1] "=&f"(tmp1)
- : [src_ptr] "r"(src_argb0), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
+ : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
[mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
[width] "r"(width)
: "memory");
}
-void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
+void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -2558,22 +2632,22 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
uint64_t src_rgb1;
uint64_t ftmp[12];
const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x002b0054007f0002;
- const uint64_t mask_v = 0x0002007f006b0014;
+ const uint64_t mask_u = 0x0015002a003f0002;
+ const uint64_t mask_v = 0x0002003f0035000a;
__asm__ volatile(
"1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
+ "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
+ "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
@@ -2581,16 +2655,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
"pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
@@ -2607,16 +2681,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest0_v], %[src1], %[src0] \n\t"
"psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
@@ -2624,16 +2698,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
"pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
@@ -2650,16 +2724,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest1_v], %[src1], %[src0] \n\t"
"psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
@@ -2667,16 +2741,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
"pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
@@ -2693,16 +2767,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"psubw %[dest2_v], %[src1], %[src0] \n\t"
"psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
@@ -2710,16 +2784,16 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
"pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
+ "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
+ "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
"gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
"gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
"punpcklbh %[src_lo], %[src0], %[zero] \n\t"
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
"punpcklbh %[src0], %[src1], %[zero] \n\t"
"punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
+ "paddh %[src0], %[src_lo], %[src0] \n\t"
+ "paddh %[src1], %[src_hi], %[src1] \n\t"
"pavgh %[src0], %[src0], %[src1] \n\t"
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
@@ -2748,7 +2822,7 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
"gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
"gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
+ "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
"daddiu %[dst_u], %[dst_u], 0x08 \n\t"
"daddiu %[dst_v], %[dst_v], 0x08 \n\t"
"daddi %[width], %[width], -0x10 \n\t"
@@ -2759,10 +2833,10 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
+ : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
+ [zero] "f"(0x00), [eight] "f"(0x08),
[sixteen] "f"(0x10)
: "memory");
}
@@ -4052,10 +4126,10 @@ void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
uint64_t tmp0, tmp1;
const uint64_t mask0 = 0x0;
const uint64_t mask1 = 0x01;
- const uint64_t mask2 = 0x00400026004B000FULL;
+ const uint64_t mask2 = 0x0080004D0096001DULL;
const uint64_t mask3 = 0xFF000000FF000000ULL;
const uint64_t mask4 = ~mask3;
- const uint64_t shift = 0x07;
+ const uint64_t shift = 0x08;
__asm__ volatile(
"1: \n\t"
@@ -4312,7 +4386,7 @@ void ARGBShadeRow_MMI(const uint8_t* src_argb,
: "memory");
}
-void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
+void ARGBMultiplyRow_MMI(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@@ -4348,12 +4422,12 @@ void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
[src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
[dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
[src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
+ : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
[dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
: "memory");
}
-void ARGBAddRow_MMI(const uint8_t* src_argb0,
+void ARGBAddRow_MMI(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@@ -4375,12 +4449,12 @@ void ARGBAddRow_MMI(const uint8_t* src_argb0,
"daddi %[width], %[width], -0x02 \n\t"
"bnez %[width], 1b \n\t"
: [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
+ : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
[dst_ptr] "r"(dst_argb), [width] "r"(width)
: "memory");
}
-void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
+void ARGBSubtractRow_MMI(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@@ -4402,7 +4476,7 @@ void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
"daddi %[width], %[width], -0x02 \n\t"
"bnez %[width], 1b \n\t"
: [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
+ : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
[dst_ptr] "r"(dst_argb), [width] "r"(width)
: "memory");
}
@@ -4778,7 +4852,9 @@ void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
: "memory");
}
-void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
+// TODO - respect YuvConstants
+void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
+ const struct YuvConstants*, int width) {
uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
const uint64_t mask0 = 0x0;
const uint64_t mask1 = 0x55;
@@ -4912,10 +4988,10 @@ void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
: "memory");
}
-void MirrorUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
+void MirrorSplitUVRow_MMI(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
uint64_t src0, src1, dest0, dest1;
const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
const uint64_t mask1 = 0x1b;
@@ -5476,10 +5552,10 @@ void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
: "memory");
}
-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
+// Blend src_argb over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb or src_argb1.
// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_MMI(const uint8_t* src_argb0,
+void ARGBBlendRow_MMI(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
@@ -5532,7 +5608,7 @@ void ARGBBlendRow_MMI(const uint8_t* src_argb0,
[dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
[src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
[dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
+ : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
[dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
[mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
[shift] "f"(shift), [width] "r"(width)
@@ -6034,6 +6110,1730 @@ void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
: "memory");
}
+void I444ToARGBRow_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y,u,v;
+ uint64_t b_vec[2],g_vec[2],r_vec[2];
+ uint64_t mask = 0xff00ff00ff00ff00ULL;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+ __asm__ volatile (
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
+ "or %[ub], %[ub], %[mask] \n\t"//must sign extension
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"//sign extension
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
+ "pmulhuh %[y], %[y], %[yg] \n\t"//y1
+
+ "punpcklbh %[u], %[u], %[zero] \n\t"//u
+ "paddsh %[b_vec0], %[y], %[bb] \n\t"
+ "pmullh %[b_vec1], %[u], %[ub] \n\t"
+ "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
+ "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
+
+ "punpcklbh %[v], %[v], %[zero] \n\t"//v
+ "paddsh %[g_vec0], %[y], %[bg] \n\t"
+ "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
+
+ "paddsh %[r_vec0], %[y], %[br] \n\t"
+ "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
+ "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
+ "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
+
+ "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
+ "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
+ "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
+ "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
+ "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
+ "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
+ "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
+ "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+ : [y]"=&f"(y),
+ [u]"=&f"(u), [v]"=&f"(v),
+ [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
+ [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
+ [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [alpha]"f"(-1),
+ [six]"f"(0x6), [five]"f"(0x55),
+ [mask]"f"(mask)
+ : "memory"
+ );
+}
+
+// Also used for 420
+void I422ToARGBRow_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y,u,v;
+ uint64_t b_vec[2],g_vec[2],r_vec[2];
+ uint64_t mask = 0xff00ff00ff00ff00ULL;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
+ "or %[ub], %[ub], %[mask] \n\t"//must sign extension
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"//sign extension
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
+ "pmulhuh %[y], %[y], %[yg] \n\t"//y1
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"//u
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec0], %[y], %[bb] \n\t"
+ "pmullh %[b_vec1], %[u], %[ub] \n\t"
+ "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
+ "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"//v
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec0], %[y], %[bg] \n\t"
+ "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
+
+ "paddsh %[r_vec0], %[y], %[br] \n\t"
+ "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
+ "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
+ "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
+
+ "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
+ "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
+ "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
+ "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
+ "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
+ "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
+ "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
+ "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y),
+ [u]"=&f"(u), [v]"=&f"(v),
+ [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
+ [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
+ [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [alpha]"f"(-1),
+ [six]"f"(0x6), [five]"f"(0x55),
+ [mask]"f"(mask)
+ : "memory"
+ );
+}
+
+// 10 bit YUV to ARGB
+void I210ToARGBRow_MMI(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y,u,v;
+ uint64_t b_vec[2],g_vec[2],r_vec[2];
+ uint64_t mask = 0xff00ff00ff00ff00ULL;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"
+
+ "1: \n\t"
+ "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t"
+ "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "psllh %[y], %[y], %[six] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "punpcklhw %[u], %[u], %[u] \n\t"
+ "psrah %[u], %[u], %[two] \n\t"
+ "punpcklhw %[v], %[v], %[v] \n\t"
+ "psrah %[v], %[v], %[two] \n\t"
+ "pminsh %[u], %[u], %[mask1] \n\t"
+ "pminsh %[v], %[v], %[mask1] \n\t"
+
+ "paddsh %[b_vec0], %[y], %[bb] \n\t"
+ "pmullh %[b_vec1], %[u], %[ub] \n\t"
+ "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
+
+ "paddsh %[g_vec0], %[y], %[bg] \n\t"
+ "pmullh %[g_vec1], %[u], %[ug] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "pmullh %[g_vec1], %[v], %[vg] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+
+ "paddsh %[r_vec0], %[y], %[br] \n\t"
+ "pmullh %[r_vec1], %[v], %[vr] \n\t"
+ "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
+
+ "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
+ "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
+ "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
+
+ "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
+ "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"
+ "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
+ "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
+ "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
+ "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y),
+ [u]"=&f"(u), [v]"=&f"(v),
+ [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
+ [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
+ [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [alpha]"f"(-1),
+ [six]"f"(0x6), [five]"f"(0x55),
+ [mask]"f"(mask), [two]"f"(0x02),
+ [mask1]"f"(0x00ff00ff00ff00ff)
+ : "memory"
+ );
+}
+
+void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y,u,v,a;
+ uint64_t b_vec[2],g_vec[2],r_vec[2];
+ uint64_t mask = 0xff00ff00ff00ff00ULL;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+ "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
+ "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
+ "pmulhuh %[y], %[y], %[yg] \n\t"//y1
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"//u
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec0], %[y], %[bb] \n\t"
+ "pmullh %[b_vec1], %[u], %[ub] \n\t"
+ "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
+ "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec0], %[y], %[bg] \n\t"
+ "pmullh %[g_vec1], %[u], %[ug] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "pmullh %[g_vec1], %[v], %[vg] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
+
+ "paddsh %[r_vec0], %[y], %[br] \n\t"
+ "pmullh %[r_vec1], %[v], %[vr] \n\t"
+ "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
+ "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
+
+ "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
+ "packushb %[g_vec0], %[g_vec0], %[a] \n\t"
+ "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
+ "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
+ "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
+ "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v), [a]"=&f"(a),
+ [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
+ [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
+ [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [a_ptr]"r"(src_a), [zero]"f"(0x00),
+ [six]"f"(0x6), [five]"f"(0x55),
+ [mask]"f"(mask)
+ : "memory"
+ );
+}
+
+void I422ToRGB24Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y,u,v;
+ uint64_t b_vec[2],g_vec[2],r_vec[2];
+ uint64_t mask = 0xff00ff00ff00ff00ULL;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
+ "pmulhuh %[y], %[y], %[yg] \n\t"//y1
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"//u
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec0], %[y], %[bb] \n\t"
+ "pmullh %[b_vec1], %[u], %[ub] \n\t"
+ "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
+ "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec0], %[y], %[bg] \n\t"
+ "pmullh %[g_vec1], %[u], %[ug] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "pmullh %[g_vec1], %[v], %[vg] \n\t"
+ "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
+ "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
+
+ "paddsh %[r_vec0], %[y], %[br] \n\t"
+ "pmullh %[r_vec1], %[v], %[vr] \n\t"
+ "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
+ "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
+
+ "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
+ "packushb %[g_vec0], %[g_vec0], %[zero] \n\t"
+ "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
+ "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
+ "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
+
+ "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t"
+ "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t"
+ "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
+ "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t"
+ "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t"
+ "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
+ "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t"
+ "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
+ "pextrh %[r_vec1], %[g_vec1], %[one] \n\t"
+ "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t"
+ "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t"
+ "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t"
+ "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
+ "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
+
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
+ [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
+ [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask]"f"(mask),
+ [lmove1]"f"(0x18), [rmove1]"f"(0x8),
+ [one]"f"(0x1)
+ : "memory"
+ );
+}
+
+void I422ToARGB4444Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
+ "pmulhuh %[y], %[y], %[yg] \n\t"//y1
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"//u
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "and %[g_vec], %[g_vec], %[mask1] \n\t"
+ "psrlw %[g_vec], %[g_vec], %[four] \n\t"
+ "psrlw %[r_vec], %[g_vec], %[four] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+ "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
+ "and %[g_vec], %[g_vec], %[r_vec] \n\t"
+
+ "and %[b_vec], %[b_vec], %[mask1] \n\t"
+ "psrlw %[b_vec], %[b_vec], %[four] \n\t"
+ "psrlw %[r_vec], %[b_vec], %[four] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
+ "and %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[b_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
+ [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
+ [alpha]"f"(-1)
+ : "memory"
+ );
+}
+
+void I422ToARGB1555Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "psrlw %[temp], %[g_vec], %[three] \n\t"
+ "and %[g_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+ "psrlw %[temp], %[temp], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+ "or %[g_vec], %[g_vec], %[mask3] \n\t"
+
+ "psrlw %[temp], %[b_vec], %[three] \n\t"
+ "and %[b_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "psrlw %[temp], %[temp], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "or %[b_vec], %[b_vec], %[mask3] \n\t"
+
+ "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
+ "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
+ "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
+ [eight]"f"(0x8), [mask3]"f"(0x800000008000),
+ [lmove5]"f"(0x5)
+ : "memory"
+ );
+}
+
+void I422ToRGB565Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ //u3|u2|u1|u0 --> u1|u1|u0|u0
+ "punpcklbh %[u], %[u], %[u] \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ //v3|v2|v1|v0 --> v1|v1|v0|v0
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "psrlh %[temp], %[g_vec], %[three] \n\t"
+ "and %[g_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[seven] \n\t"
+ "psrlw %[r_vec], %[mask1], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[r_vec] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+ "paddb %[r_vec], %[three], %[six] \n\t"
+ "psrlw %[temp], %[temp], %[r_vec] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "paddb %[temp], %[three], %[eight] \n\t"
+ "psllw %[r_vec], %[r_vec], %[temp] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+
+ "psrlh %[temp], %[b_vec], %[three] \n\t"
+ "and %[b_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[seven] \n\t"
+ "psrlw %[r_vec], %[mask1], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[r_vec] \n\t"
+ "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "paddb %[r_vec], %[three], %[six] \n\t"
+ "psrlw %[temp], %[temp], %[r_vec] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "paddb %[temp], %[three], %[eight] \n\t"
+ "psllw %[r_vec], %[r_vec], %[temp] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
+ "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
+ "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
+ [eight]"f"(0x8), [seven]"f"(0x7),
+ [lmove5]"f"(0x5)
+ : "memory"
+ );
+}
+
+void NV12ToARGBRow_MMI(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "pshufh %[v], %[u], %[vshu] \n\t"
+ "pshufh %[u], %[u], %[ushu] \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
+ [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [alpha]"f"(-1)
+ : "memory"
+ );
+}
+
+void NV21ToARGBRow_MMI(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "pshufh %[v], %[u], %[ushu] \n\t"
+ "pshufh %[u], %[u], %[vshu] \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
+ [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [alpha]"f"(-1)
+ : "memory"
+ );
+}
+
+void NV12ToRGB24Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "pshufh %[v], %[u], %[vshu] \n\t"
+ "pshufh %[u], %[u], %[ushu] \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
+ "psllw %[temp], %[r_vec], %[lmove1] \n\t"
+ "or %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
+ "pextrh %[temp], %[temp], %[zero] \n\t"
+ "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
+ "pextrh %[temp], %[b_vec], %[zero] \n\t"
+ "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
+ "pextrh %[temp], %[b_vec], %[one] \n\t"
+ "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
+ "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
+ "or %[b_vec], %[b_vec], %[temp] \n\t"
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
+ "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
+ [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [alpha]"f"(-1), [lmove1]"f"(0x18),
+ [one]"f"(0x1), [rmove1]"f"(0x8)
+ : "memory"
+ );
+}
+
+void NV21ToRGB24Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "pshufh %[v], %[u], %[ushu] \n\t"
+ "pshufh %[u], %[u], %[vshu] \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
+ "psllw %[temp], %[r_vec], %[lmove1] \n\t"
+ "or %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
+ "pextrh %[temp], %[temp], %[zero] \n\t"
+ "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
+ "pextrh %[temp], %[b_vec], %[zero] \n\t"
+ "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
+ "pextrh %[temp], %[b_vec], %[one] \n\t"
+ "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
+ "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
+ "or %[b_vec], %[b_vec], %[temp] \n\t"
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
+ "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
+ [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [lmove1]"f"(0x18), [rmove1]"f"(0x8),
+ [one]"f"(0x1)
+ : "memory"
+ );
+}
+
+void NV12ToRGB565Row_MMI(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "pshufh %[v], %[u], %[vshu] \n\t"
+ "pshufh %[u], %[u], %[ushu] \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "psrlh %[temp], %[g_vec], %[three] \n\t"
+ "and %[g_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[seven] \n\t"
+ "psrlw %[r_vec], %[mask1], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[r_vec] \n\t"
+ "psubb %[y], %[eight], %[three] \n\t"//5
+ "psllw %[r_vec], %[r_vec], %[y] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+ "paddb %[r_vec], %[three], %[six] \n\t"
+ "psrlw %[temp], %[temp], %[r_vec] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "paddb %[temp], %[three], %[eight] \n\t"
+ "psllw %[r_vec], %[r_vec], %[temp] \n\t"
+ "or %[g_vec], %[g_vec], %[r_vec] \n\t"
+
+ "psrlh %[temp], %[b_vec], %[three] \n\t"
+ "and %[b_vec], %[temp], %[mask2] \n\t"
+ "psrlw %[temp], %[temp], %[seven] \n\t"
+ "psrlw %[r_vec], %[mask1], %[eight] \n\t"
+ "and %[r_vec], %[temp], %[r_vec] \n\t"
+ "psubb %[y], %[eight], %[three] \n\t"//5
+ "psllw %[r_vec], %[r_vec], %[y] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+ "paddb %[r_vec], %[three], %[six] \n\t"
+ "psrlw %[temp], %[temp], %[r_vec] \n\t"
+ "and %[r_vec], %[temp], %[mask2] \n\t"
+ "paddb %[temp], %[three], %[eight] \n\t"
+ "psllw %[r_vec], %[r_vec], %[temp] \n\t"
+ "or %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
+ "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
+ "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
+ "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
+ [dst_rgb565]"r"(dst_rgb565),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
+ [eight]"f"(0x8), [seven]"f"(0x7)
+ : "memory"
+ );
+}
+
+void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t"
+ "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t"
+ "psrlh %[temp], %[y], %[eight] \n\t"
+ "pshufh %[u], %[temp], %[ushu] \n\t"
+ "pshufh %[v], %[temp], %[vshu] \n\t"
+
+ "psrlh %[temp], %[mask1], %[eight] \n\t"
+ "and %[y], %[y], %[temp] \n\t"
+ "psllh %[temp], %[y], %[eight] \n\t"
+ "or %[y], %[y], %[temp] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [alpha]"f"(-1), [eight]"f"(0x8)
+ : "memory"
+ );
+}
+
+void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t"
+ "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t"
+ "psrlh %[temp], %[mask1], %[eight] \n\t"
+ "and %[temp], %[y], %[temp] \n\t"
+ "pshufh %[u], %[temp], %[ushu] \n\t"
+ "pshufh %[v], %[temp], %[vshu] \n\t"
+
+ "psrlh %[y], %[y], %[eight] \n\t"
+ "psllh %[temp], %[y], %[eight] \n\t"
+ "or %[y], %[y], %[temp] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
+ "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
+ "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [ushu]"f"(0xA0), [vshu]"f"(0xf5),
+ [alpha]"f"(-1), [eight]"f"(0x8)
+ : "memory"
+ );
+}
+
+void I422ToRGBARow_MMI(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint64_t y, u, v;
+ uint64_t b_vec, g_vec, r_vec, temp;
+ uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+
+ __asm__ volatile(
+ "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
+ "or %[ub], %[ub], %[mask1] \n\t"
+ "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[ug], %[ug], %[zero] \n\t"
+ "pshufh %[ug], %[ug], %[zero] \n\t"
+ "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vg], %[vg], %[zero] \n\t"
+ "pshufh %[vg], %[vg], %[five] \n\t"
+ "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
+ "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
+ "punpcklbh %[vr], %[vr], %[zero] \n\t"
+ "pshufh %[vr], %[vr], %[five] \n\t"
+ "or %[vr], %[vr], %[mask1] \n\t"
+
+ "1: \n\t"
+ "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
+ "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
+ "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
+ "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
+ "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
+ "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
+
+ "punpcklbh %[y], %[y], %[y] \n\t"
+ "pmulhuh %[y], %[y], %[yg] \n\t"
+
+ "punpcklbh %[u], %[u], %[u] \n\t"
+ "punpcklbh %[u], %[u], %[zero] \n\t"
+ "paddsh %[b_vec], %[y], %[bb] \n\t"
+ "pmullh %[temp], %[u], %[ub] \n\t"
+ "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
+ "psrah %[b_vec], %[b_vec], %[six] \n\t"
+
+ "punpcklbh %[v], %[v], %[v] \n\t"
+ "punpcklbh %[v], %[v], %[zero] \n\t"
+ "paddsh %[g_vec], %[y], %[bg] \n\t"
+ "pmullh %[temp], %[u], %[ug] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "pmullh %[temp], %[v], %[vg] \n\t"
+ "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
+ "psrah %[g_vec], %[g_vec], %[six] \n\t"
+
+ "paddsh %[r_vec], %[y], %[br] \n\t"
+ "pmullh %[temp], %[v], %[vr] \n\t"
+ "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
+ "psrah %[r_vec], %[r_vec], %[six] \n\t"
+
+ "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
+ "packushb %[g_vec], %[g_vec], %[zero] \n\t"
+ "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t"
+ "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t"
+ "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t"
+ "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
+ "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
+
+ "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
+ "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
+ "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
+
+ "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
+ "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
+ "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
+ "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
+ "daddi %[width], %[width], -0x04 \n\t"
+ "bnez %[width], 1b \n\t"
+
+ : [y]"=&f"(y), [u]"=&f"(u),
+ [v]"=&f"(v),
+ [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
+ [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
+ [ub]"=&f"(ub), [ug]"=&f"(ug),
+ [vg]"=&f"(vg), [vr]"=&f"(vr),
+ [bb]"=&f"(bb), [bg]"=&f"(bg),
+ [br]"=&f"(br), [yg]"=&f"(yg)
+ : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
+ [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
+ [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
+ [zero]"f"(0x00), [five]"f"(0x55),
+ [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
+ [alpha]"f"(-1)
+ : "memory"
+ );
+}
+
+void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
+ __asm__ volatile (
+ "punpcklwd %[v32], %[v32], %[v32] \n\t"
+ "1: \n\t"
+ "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
+ "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t"
+ "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t"
+ "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t"
+
+ "daddi %[width], %[width], -0x04 \n\t"
+ "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
+ "bnez %[width], 1b \n\t"
+ : [v32]"+&f"(v32)
+ : [dst_ptr]"r"(dst_argb), [width]"r"(width)
+ : "memory"
+ );
+}
+// clang-format on
+
+// 10 bit YUV to ARGB
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#ifdef __cplusplus