aboutsummaryrefslogtreecommitdiff
path: root/libvpx/vp9/common/vp9_maskingmv.c
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp9/common/vp9_maskingmv.c')
-rw-r--r--libvpx/vp9/common/vp9_maskingmv.c803
1 files changed, 803 insertions, 0 deletions
diff --git a/libvpx/vp9/common/vp9_maskingmv.c b/libvpx/vp9/common/vp9_maskingmv.c
new file mode 100644
index 000000000..326201bbe
--- /dev/null
+++ b/libvpx/vp9/common/vp9_maskingmv.c
@@ -0,0 +1,803 @@
+/*
+ ============================================================================
+ Name : vp9_maskingmv.c
+ Author : jimbankoski
+ Version :
+ Copyright : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+unsigned int vp9_sad16x16_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int max_err);
+
+int vp8_growmaskmb_sse3(
+ unsigned char *om,
+ unsigned char *nm);
+
+void vp8_makemask_sse3(
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *ym,
+ int yp,
+ int uvp,
+ int ys,
+ int us,
+ int vs,
+ int yt,
+ int ut,
+ int vt);
+
+unsigned int vp9_sad16x16_unmasked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp9_sad16x16_masked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+ unsigned char *ymask,
+ unsigned char *uvmask);
+int yp = 16;
+unsigned char sxy[] = {
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90
+};
+
+unsigned char sts[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+unsigned char str[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+unsigned char y[] = {
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
+ 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40
+};
+int uvp = 8;
+unsigned char u[] = {
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 84, 70, 70, 90, 90, 90, 17, 17,
+ 84, 70, 70, 90, 90, 90, 17, 17,
+ 80, 70, 70, 90, 90, 90, 17, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17,
+ 90, 80, 70, 70, 90, 90, 90, 17
+};
+
+unsigned char v[] = {
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct {
+ unsigned char y;
+ unsigned char yt;
+ unsigned char u;
+ unsigned char ut;
+ unsigned char v;
+ unsigned char vt;
+ unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[] = {
+ { 79, 44, 92, 44, 237, 60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v,
+ COLOR_SEG_ELEMENT sgm[],
+ int c) {
+ COLOR_SEG_ELEMENT *s = sgm;
+ unsigned char m = 0;
+ int i;
+ for (i = 0; i < c; i++, s++)
+ m |= (abs(y - s->y) < s->yt &&
+ abs(u - s->u) < s->ut &&
+ abs(v - s->v) < s->vt ? 255 : 0);
+
+ return m;
+}
+int neighbors[256][8];
+int makeneighbors(void) {
+ int i, j;
+ for (i = 0; i < 256; i++) {
+ int r = (i >> 4), c = (i & 15);
+ int ni = 0;
+ for (j = 0; j < 8; j++)
+ neighbors[i][j] = i;
+ for (j = 0; j < 256; j++) {
+ int nr = (j >> 4), nc = (j & 15);
+ if (abs(nr - r) < 2 && abs(nc - c) < 2)
+ neighbors[i][ni++] = j;
+ }
+ }
+ return 0;
+}
+void grow_ymask(unsigned char *ym) {
+ unsigned char nym[256];
+ int i, j;
+
+ for (i = 0; i < 256; i++) {
+ nym[i] = ym[i];
+ for (j = 0; j < 8; j++) {
+ nym[i] |= ym[neighbors[i][j]];
+ }
+ }
+ for (i = 0; i < 256; i++)
+ ym[i] = nym[i];
+}
+
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+ unsigned char *ym, unsigned char *uvm,
+ int yp, int uvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count) {
+ int r, c;
+ unsigned char *oym = ym;
+
+ memset(ym, 20, 256);
+ for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32)
+ for (c = 0; c < 8; c++) {
+ int y1 = y[c << 1];
+ int u1 = u[c];
+ int v1 = v[c];
+ int m = pixel_mask(y1, u1, v1, sgm, count);
+ uvm[c] = m;
+ ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+ ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count);
+ ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count);
+ ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, count);
+ }
+ grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
+ for (j = 0; j < 16; j++)
+ if (ym[j])
+ sad += abs(src[j] - dst[j]);
+
+ return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, sym += 16, ym += 16)
+ for (j = 0; j < 16; j++)
+ sad += (sym[j] != ym[j] ? 1 : 0);
+
+ return sad;
+}
+
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym) {
+ int i, j;
+ unsigned sad = 0;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
+ for (j = 0; j < 16; j++)
+ if (!ym[j])
+ sad += abs(src[j] - dst[j]);
+
+ return sad;
+}
+
+int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm) {
+ int i, j;
+
+ unsigned char ym[256];
+ unsigned char uvm[64];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0;
+ int beste = 256;
+ int bmi = -32, bmj = -32;
+ int bui = -32, buj = -32;
+ int beste1 = 256;
+ int bmi1 = -32, bmj1 = -32;
+ int bui1 = -32, buj1 = -32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
+
+ e = unmasked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ // bui=0;buj=0;
+ // best mv masked destination
+ make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
+ dym, duvm, dyp, duvp, sgm, count);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = masked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+ beste1 = beste + obeste;
+ bmi1 = bmi;
+ bmj1 = bmj;
+ bui1 = bui;
+ buj1 = buj;
+
+ beste = 0xffffffff;
+ // source mask
+ make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count);
+
+ // find best mask
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
+
+ e = compare_masks(ym, dym);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+
+
+ // best mv masked destination
+ make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
+ dym, duvm, dyp, duvp, sgm, count);
+
+ obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym);
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = unmasked_sad(y, yp, dyz + j, dyp, dym);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ beste += obeste;
+
+
+ if (beste < beste1) {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ } else {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+
+ }
+ return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym, unsigned char *prd) {
+ int i, j;
+ for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16)
+ for (j = 0; j < 16; j++)
+ prd[j] = (ym[j] ? src[j] : dst[j]);
+ return 0;
+}
+
+int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm) {
+ int i, j;
+
+ unsigned char ym[256];
+ unsigned char ym2[256];
+ unsigned char uvm[64];
+ unsigned char dym2[256];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0;
+ int beste = 256;
+ int bmi = -32, bmj = -32;
+ int bui = -32, buj = -32;
+ int beste1 = 256;
+ int bmi1 = -32, bmj1 = -32;
+ int bui1 = -32, buj1 = -32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+#if 0
+ for (i = 0; i < 16; i++) {
+ unsigned char *dy = i * yp + y;
+ for (j = 0; j < 16; j++)
+ printf("%2x", dy[j]);
+ printf("\n");
+ }
+ printf("\n");
+
+ for (i = -32; i < 48; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 48; j++)
+ printf("%2x", dyz[j]);
+ printf("\n");
+ }
+#endif
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ // bui=0;buj=0;
+ // best mv masked destination
+
+ vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
+ dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2);
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+ beste1 = beste + obeste;
+ bmi1 = bmi;
+ bmj1 = bmj;
+ bui1 = bui;
+ buj1 = buj;
+
+ // source mask
+ vp8_makemask_sse3(y, u, v,
+ ym, yp, uvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(ym, ym2);
+
+ // find best mask
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ unsigned char *duz = i / 2 * duvp + du;
+ unsigned char *dvz = i / 2 * duvp + dv;
+ for (j = -32; j < 32; j++) {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ e = compare_masks(ym2, dym2);
+
+ if (e < beste) {
+ bmi = i;
+ bmj = j;
+ beste = e;
+ }
+ }
+ }
+
+ vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
+ dym, dyp, duvp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+
+ obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2);
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for (i = -32; i < 32; i++) {
+ unsigned char *dyz = i * dyp + dy;
+ for (j = -32; j < 32; j++) {
+ e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
+
+ if (e < beste) {
+ bui = i;
+ buj = j;
+ beste = e;
+ }
+ }
+ }
+ beste += obeste;
+
+ if (beste < beste1) {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ } else {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+ beste = beste1;
+
+ }
+ return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+ int ymp, int uvmp,
+ unsigned char *yp, unsigned char *up, unsigned char *vp,
+ int ypp, int uvpp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int mi,
+ int mj,
+ int ui,
+ int uj,
+ int wm) {
+ int i, j;
+ unsigned char dym[256];
+ unsigned char dym2[256];
+ unsigned char duvm[64];
+ unsigned char *yu = ym, *uu = um, *vu = vm;
+
+ unsigned char *dym3 = dym2;
+
+ ym += mi * ymp + mj;
+ um += mi / 2 * uvmp + mj / 2;
+ vm += mi / 2 * uvmp + mj / 2;
+
+ yu += ui * ymp + uj;
+ uu += ui / 2 * uvmp + uj / 2;
+ vu += ui / 2 * uvmp + uj / 2;
+
+ // best mv masked destination
+ if (wm)
+ vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+ else
+ vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp,
+ sgm[0].y, sgm[0].u, sgm[0].v,
+ sgm[0].yt, sgm[0].ut, sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym, dym2);
+ vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3);
+ vp8_uv_from_y_mask(dym3, duvm);
+ vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm);
+ vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm);
+
+ return 0;
+}
+
+unsigned char f0p[1280 * 720 * 3 / 2];
+unsigned char f1p[1280 * 720 * 3 / 2];
+unsigned char prd[1280 * 720 * 3 / 2];
+unsigned char msk[1280 * 720 * 3 / 2];
+
+
+int mainz(int argc, char *argv[]) {
+
+ FILE *f = fopen(argv[1], "rb");
+ FILE *g = fopen(argv[2], "wb");
+ int w = atoi(argv[3]), h = atoi(argv[4]);
+ int y_stride = w, uv_stride = w / 2;
+ int r, c;
+ unsigned char *f0 = f0p, *f1 = f1p, *t;
+ unsigned char ym[256], uvm[64];
+ unsigned char ym2[256], uvm2[64];
+ unsigned char ym3[256], uvm3[64];
+ int a, b;
+
+ COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best;
+#if 0
+ makeneighbors();
+ COLOR_SEG_ELEMENT segmentation[] = {
+ { 60, 4, 80, 17, 80, 10, 1},
+ { 40, 4, 15, 10, 80, 10, 1},
+ };
+ make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1);
+
+ vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8,
+ (int) segmentation[0].y, (int) segmentation[0].u, (int) segmentation[0].v,
+ segmentation[0].yt, segmentation[0].ut, segmentation[0].vt);
+
+ vp8_growmaskmb_sse3(ym, ym3);
+
+ a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3);
+ b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3);
+
+ vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3);
+
+ vp8_uv_from_y_mask(ym3, uvm3);
+
+ return 4;
+#endif
+ makeneighbors();
+
+
+ memset(prd, 128, w * h * 3 / 2);
+
+ fread(f0, w * h * 3 / 2, 1, f);
+
+ while (!feof(f)) {
+ unsigned char *ys = f1, *yd = f0, *yp = prd;
+ unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h;
+ unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd + w * h * 5 / 4;
+ fread(f1, w * h * 3 / 2, 1, f);
+
+ ys += 32 * y_stride;
+ yd += 32 * y_stride;
+ yp += 32 * y_stride;
+ us += 16 * uv_stride;
+ ud += 16 * uv_stride;
+ up += 16 * uv_stride;
+ vs += 16 * uv_stride;
+ vd += 16 * uv_stride;
+ vp += 16 * uv_stride;
+ for (r = 32; r < h - 32; r += 16,
+ ys += 16 * w, yd += 16 * w, yp += 16 * w,
+ us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride,
+ vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) {
+ for (c = 32; c < w - 32; c += 16) {
+ int mi, mj, ui, uj, wm;
+ int bmi, bmj, bui, buj, bwm;
+ unsigned char ym[256];
+
+ if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0)
+ bmi = bmj = bui = buj = bwm = 0;
+ else {
+ COLOR_SEG_ELEMENT cs[5];
+ int j;
+ unsigned int beste = 0xfffffff;
+ unsigned int bestj = 0;
+
+ // try color from last mb segmentation
+ cs[0] = last;
+
+ // try color segs from 4 pixels in mb recon as segmentation
+ cs[1].y = yd[c + y_stride + 1];
+ cs[1].u = ud[c / 2 + uv_stride];
+ cs[1].v = vd[c / 2 + uv_stride];
+ cs[1].yt = cs[1].ut = cs[1].vt = 20;
+ cs[2].y = yd[c + w + 14];
+ cs[2].u = ud[c / 2 + uv_stride + 7];
+ cs[2].v = vd[c / 2 + uv_stride + 7];
+ cs[2].yt = cs[2].ut = cs[2].vt = 20;
+ cs[3].y = yd[c + w * 14 + 1];
+ cs[3].u = ud[c / 2 + uv_stride * 7];
+ cs[3].v = vd[c / 2 + uv_stride * 7];
+ cs[3].yt = cs[3].ut = cs[3].vt = 20;
+ cs[4].y = yd[c + w * 14 + 14];
+ cs[4].u = ud[c / 2 + uv_stride * 7 + 7];
+ cs[4].v = vd[c / 2 + uv_stride * 7 + 7];
+ cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+ for (j = 0; j < 5; j++) {
+ int e;
+
+ e = fast_masked_motion_search(
+ ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride,
+ yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride,
+ &cs[j], 1, &mi, &mj, &ui, &uj, &wm);
+
+ if (e < beste) {
+ bmi = mi;
+ bmj = mj;
+ bui = ui;
+ buj = uj, bwm = wm;
+ bestj = j;
+ beste = e;
+ }
+ }
+ best = cs[bestj];
+ // best = segmentation[0];
+ last = best;
+ }
+ predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride,
+ yp + c, up + c / 2, vp + c / 2, w, uv_stride,
+ &best, 1, bmi, bmj, bui, buj, bwm);
+
+ }
+ }
+ fwrite(prd, w * h * 3 / 2, 1, g);
+ t = f0;
+ f0 = f1;
+ f1 = t;
+
+ }
+ fclose(f);
+ fclose(g);
+ return 0;
+}