aboutsummaryrefslogtreecommitdiff
path: root/third_party/libaom/source/libaom/aom_dsp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libaom/source/libaom/aom_dsp')
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c2
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/butteraugli.c78
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/butteraugli.h4
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/fastssim.c1
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/grain_table.c10
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/noise_model.c25
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/psnrhvs.c7
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/ssim.c140
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/ssim.h26
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/vmaf.c247
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/vmaf.h45
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm45
-rw-r--r--third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c16
13 files changed, 325 insertions, 321 deletions
diff --git a/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c b/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
index 6d41708ee0..945e7e48ee 100644
--- a/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
+++ b/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
@@ -11,8 +11,6 @@
#include <arm_neon.h>
-#include "common/tools_common.h"
-
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
diff --git a/third_party/libaom/source/libaom/aom_dsp/butteraugli.c b/third_party/libaom/source/libaom/aom_dsp/butteraugli.c
index 7ce2324c06..038efcd313 100644
--- a/third_party/libaom/source/libaom/aom_dsp/butteraugli.c
+++ b/third_party/libaom/source/libaom/aom_dsp/butteraugli.c
@@ -18,37 +18,71 @@
int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *distorted, int bit_depth,
- float *dist_map) {
+ aom_matrix_coefficients_t matrix_coefficients,
+ aom_color_range_t color_range, float *dist_map) {
(void)bit_depth;
assert(bit_depth == 8);
- assert(source->y_width == source->uv_width * 2);
const int width = source->y_crop_width;
const int height = source->y_crop_height;
+ const int ss_x = source->subsampling_x;
+ const int ss_y = source->subsampling_y;
- size_t buffer_size = width * height * 3;
- uint8_t *src_rgb = (uint8_t *)aom_malloc(buffer_size);
- uint8_t *distorted_rgb = (uint8_t *)aom_malloc(buffer_size);
- if (!src_rgb || !distorted_rgb) {
- aom_free(src_rgb);
- aom_free(distorted_rgb);
+ const struct YuvConstants *yuv_constants;
+ if (matrix_coefficients == AOM_CICP_MC_BT_709) {
+ if (color_range == AOM_CR_FULL_RANGE) return 0;
+ yuv_constants = &kYuvH709Constants;
+ } else {
+ yuv_constants = color_range == AOM_CR_FULL_RANGE ? &kYuvJPEGConstants
+ : &kYuvI601Constants;
+ }
+
+ const size_t stride_argb = width * 4;
+ const size_t buffer_size = height * stride_argb;
+ uint8_t *src_argb = (uint8_t *)aom_malloc(buffer_size);
+ uint8_t *distorted_argb = (uint8_t *)aom_malloc(buffer_size);
+ if (!src_argb || !distorted_argb) {
+ aom_free(src_argb);
+ aom_free(distorted_argb);
return 0;
}
- I420ToRGB24Matrix(source->y_buffer, source->y_stride, source->u_buffer,
- source->uv_stride, source->v_buffer, source->uv_stride,
- src_rgb, width * 3, &kYuvH709Constants, width, height);
- I420ToRGB24Matrix(distorted->y_buffer, distorted->y_stride,
- distorted->u_buffer, distorted->uv_stride,
- distorted->v_buffer, distorted->uv_stride, distorted_rgb,
- width * 3, &kYuvH709Constants, width, height);
+ if (ss_x == 1 && ss_y == 1) {
+ I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+ source->uv_stride, source->v_buffer, source->uv_stride,
+ src_argb, stride_argb, yuv_constants, width, height);
+ I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+ distorted->u_buffer, distorted->uv_stride,
+ distorted->v_buffer, distorted->uv_stride, distorted_argb,
+ stride_argb, yuv_constants, width, height);
+ } else if (ss_x == 1 && ss_y == 0) {
+ I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+ source->uv_stride, source->v_buffer, source->uv_stride,
+ src_argb, stride_argb, yuv_constants, width, height);
+ I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+ distorted->u_buffer, distorted->uv_stride,
+ distorted->v_buffer, distorted->uv_stride, distorted_argb,
+ stride_argb, yuv_constants, width, height);
+ } else if (ss_x == 0 && ss_y == 0) {
+ I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+ source->uv_stride, source->v_buffer, source->uv_stride,
+ src_argb, stride_argb, yuv_constants, width, height);
+ I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+ distorted->u_buffer, distorted->uv_stride,
+ distorted->v_buffer, distorted->uv_stride, distorted_argb,
+ stride_argb, yuv_constants, width, height);
+ } else {
+ aom_free(src_argb);
+ aom_free(distorted_argb);
+ return 0;
+ }
- JxlPixelFormat pixel_format = { 3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 };
+ JxlPixelFormat pixel_format = { 4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 };
JxlButteraugliApi *api = JxlButteraugliApiCreate(NULL);
JxlButteraugliApiSetHFAsymmetry(api, 0.8f);
JxlButteraugliResult *result = JxlButteraugliCompute(
- api, width, height, &pixel_format, src_rgb, buffer_size, &pixel_format,
- distorted_rgb, buffer_size);
+ api, width, height, &pixel_format, src_argb, buffer_size, &pixel_format,
+ distorted_argb, buffer_size);
const float *distmap = NULL;
uint32_t row_stride;
@@ -56,8 +90,8 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
if (distmap == NULL) {
JxlButteraugliApiDestroy(api);
JxlButteraugliResultDestroy(result);
- aom_free(src_rgb);
- aom_free(distorted_rgb);
+ aom_free(src_argb);
+ aom_free(distorted_argb);
return 0;
}
@@ -69,7 +103,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
JxlButteraugliApiDestroy(api);
JxlButteraugliResultDestroy(result);
- aom_free(src_rgb);
- aom_free(distorted_rgb);
+ aom_free(src_argb);
+ aom_free(distorted_argb);
return 1;
}
diff --git a/third_party/libaom/source/libaom/aom_dsp/butteraugli.h b/third_party/libaom/source/libaom/aom_dsp/butteraugli.h
index 06402aa3e4..5304092ccb 100644
--- a/third_party/libaom/source/libaom/aom_dsp/butteraugli.h
+++ b/third_party/libaom/source/libaom/aom_dsp/butteraugli.h
@@ -14,8 +14,10 @@
#include "aom_scale/yv12config.h"
+// Returns a boolean that indicates success/failure.
int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *distorted, int bit_depth,
- float *dist_map);
+ aom_matrix_coefficients_t matrix_coefficients,
+ aom_color_range_t color_range, float *dist_map);
#endif // AOM_AOM_DSP_BUTTERAUGLI_H_
diff --git a/third_party/libaom/source/libaom/aom_dsp/fastssim.c b/third_party/libaom/source/libaom/aom_dsp/fastssim.c
index 3804519b31..89712c5f40 100644
--- a/third_party/libaom/source/libaom/aom_dsp/fastssim.c
+++ b/third_party/libaom/source/libaom/aom_dsp/fastssim.c
@@ -31,6 +31,7 @@ typedef struct fs_ctx fs_ctx;
#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)
#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)
#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)
+#define MAX_SSIM_DB 100.0
#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
diff --git a/third_party/libaom/source/libaom/aom_dsp/grain_table.c b/third_party/libaom/source/libaom/aom_dsp/grain_table.c
index e03f04d5da..b22752abd9 100644
--- a/third_party/libaom/source/libaom/aom_dsp/grain_table.c
+++ b/third_party/libaom/source/libaom/aom_dsp/grain_table.c
@@ -202,7 +202,7 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
int64_t end_time, int erase,
aom_film_grain_t *grain) {
aom_film_grain_table_entry_t *entry = t->head;
- aom_film_grain_table_entry_t *prev_entry = 0;
+ aom_film_grain_table_entry_t *prev_entry = NULL;
uint16_t random_seed = grain ? grain->random_seed : 0;
if (grain) memset(grain, 0, sizeof(*grain));
@@ -241,10 +241,10 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
entry->end_time = time_stamp;
if (t->tail == entry) t->tail = new_entry;
}
- // If segments aren't aligned, delete from the beggining of subsequent
+ // If segments aren't aligned, delete from the beginning of subsequent
// segments
if (end_time > entry_end_time) {
- aom_film_grain_table_lookup(t, entry->end_time, end_time, 1, 0);
+ aom_film_grain_table_lookup(t, entry_end_time, end_time, 1, 0);
}
return 1;
}
@@ -275,12 +275,12 @@ aom_codec_err_t aom_film_grain_table_read(
return error_info->error_code;
}
- aom_film_grain_table_entry_t *prev_entry = 0;
+ aom_film_grain_table_entry_t *prev_entry = NULL;
while (!feof(file)) {
aom_film_grain_table_entry_t *entry = aom_malloc(sizeof(*entry));
memset(entry, 0, sizeof(*entry));
grain_table_entry_read(file, error_info, entry);
- entry->next = 0;
+ entry->next = NULL;
if (prev_entry) prev_entry->next = entry;
if (!t->head) t->head = entry;
diff --git a/third_party/libaom/source/libaom/aom_dsp/noise_model.c b/third_party/libaom/source/libaom/aom_dsp/noise_model.c
index f56fdd5860..19c660e911 100644
--- a/third_party/libaom/source/libaom/aom_dsp/noise_model.c
+++ b/third_party/libaom/source/libaom/aom_dsp/noise_model.c
@@ -214,6 +214,7 @@ static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
if (!lut) return 0;
+ if (num_points <= 0) return 0;
lut->num_points = 0;
lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
if (!lut->points) return 0;
@@ -1152,12 +1153,24 @@ int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
// Convert the scaling functions to 8 bit values
aom_noise_strength_lut_t scaling_points[3];
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
+ if (!aom_noise_strength_solver_fit_piecewise(
+ &noise_model->combined_state[0].strength_solver, 14,
+ scaling_points + 0)) {
+ return 0;
+ }
+ if (!aom_noise_strength_solver_fit_piecewise(
+ &noise_model->combined_state[1].strength_solver, 10,
+ scaling_points + 1)) {
+ aom_noise_strength_lut_free(scaling_points + 0);
+ return 0;
+ }
+ if (!aom_noise_strength_solver_fit_piecewise(
+ &noise_model->combined_state[2].strength_solver, 10,
+ scaling_points + 2)) {
+ aom_noise_strength_lut_free(scaling_points + 0);
+ aom_noise_strength_lut_free(scaling_points + 1);
+ return 0;
+ }
// Both the domain and the range of the scaling functions in the film_grain
// are normalized to 8-bit (e.g., they are implicitly scaled during grain
diff --git a/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c b/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c
index 69a1d99bf2..25f075aa2f 100644
--- a/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c
+++ b/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c
@@ -34,6 +34,7 @@ static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
*(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
}
+#if CONFIG_AV1_HIGHBITDEPTH
static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
int xstride) {
int i, j;
@@ -43,6 +44,7 @@ static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
for (j = 0; j < 8; j++)
*(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
}
+#endif // CONFIG_AV1_HIGHBITDEPTH
/* Normalized inverse quantization matrix for 8x8 DCT at the point of
* transparency. This is not the JPEG based matrix from the paper,
@@ -210,6 +212,7 @@ static double calc_psnrhvs(const unsigned char *src, int _systride,
}
}
s_gvar = 1.f / (36 - n + 1) * s_gmean / 36.f;
+#if CONFIG_AV1_HIGHBITDEPTH
if (!buf_is_hbd) {
od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
@@ -217,6 +220,10 @@ static double calc_psnrhvs(const unsigned char *src, int _systride,
hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
}
+#else
+ od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+ od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+#endif // CONFIG_AV1_HIGHBITDEPTH
for (i = 0; i < 8; i++)
for (j = (i == 0); j < 8; j++)
s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
diff --git a/third_party/libaom/source/libaom/aom_dsp/ssim.c b/third_party/libaom/source/libaom/aom_dsp/ssim.c
index 357da99ae4..c5334fd2c5 100644
--- a/third_party/libaom/source/libaom/aom_dsp/ssim.c
+++ b/third_party/libaom/source/libaom/aom_dsp/ssim.c
@@ -18,6 +18,7 @@
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
+#if CONFIG_INTERNAL_STATS
void aom_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
uint32_t *sum_s, uint32_t *sum_r,
uint32_t *sum_sq_s, uint32_t *sum_sq_r,
@@ -33,6 +34,7 @@ void aom_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
}
}
}
+#endif // CONFIG_INTERNAL_STATS
void aom_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s,
@@ -49,24 +51,6 @@ void aom_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
}
}
-#if CONFIG_AV1_HIGHBITDEPTH
-void aom_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r,
- int rp, uint32_t *sum_s, uint32_t *sum_r,
- uint32_t *sum_sq_s, uint32_t *sum_sq_r,
- uint32_t *sum_sxr) {
- int i, j;
- for (i = 0; i < 8; i++, s += sp, r += rp) {
- for (j = 0; j < 8; j++) {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-#endif
-
static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2
@@ -78,7 +62,7 @@ static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s,
uint32_t sum_sq_r, uint32_t sum_sxr, int count,
uint32_t bd) {
double ssim_n, ssim_d;
- int64_t c1, c2;
+ int64_t c1 = 0, c2 = 0;
if (bd == 8) {
// scale the constants by number of pixels
c1 = (cc1 * count * count) >> 12;
@@ -90,8 +74,9 @@ static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s,
c1 = (cc1_12 * count * count) >> 12;
c2 = (cc2_12 * count * count) >> 12;
} else {
- c1 = c2 = 0;
assert(0);
+ // Return similarity as zero for unsupported bit-depth values.
+ return 0;
}
ssim_n = (2.0 * sum_s * sum_r + c1) *
@@ -111,21 +96,11 @@ static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) {
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8);
}
-static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
- int rp, uint32_t bd, uint32_t shift) {
- uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- aom_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
- &sum_sxr);
- return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift),
- sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd);
-}
-
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
-static double aom_ssim2(const uint8_t *img1, const uint8_t *img2,
- int stride_img1, int stride_img2, int width,
- int height) {
+double aom_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1,
+ int stride_img2, int width, int height) {
int i, j;
int samples = 0;
double ssim_total = 0;
@@ -143,31 +118,10 @@ static double aom_ssim2(const uint8_t *img1, const uint8_t *img2,
return ssim_total;
}
-static double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
- int stride_img1, int stride_img2, int width,
- int height, uint32_t bd, uint32_t shift) {
- int i, j;
- int samples = 0;
- double ssim_total = 0;
-
- // sample point start with each 4x4 location
- for (i = 0; i <= height - 8;
- i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
- for (j = 0; j <= width - 8; j += 4) {
- double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
- CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd,
- shift);
- ssim_total += v;
- samples++;
- }
- }
- ssim_total /= samples;
- return ssim_total;
-}
-
-void aom_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight,
- double *fast_ssim) {
+#if CONFIG_INTERNAL_STATS
+void aom_lowbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest, double *weight,
+ double *fast_ssim) {
double abc[3];
for (int i = 0; i < 3; ++i) {
const int is_uv = i > 0;
@@ -421,7 +375,57 @@ double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
m->dssim = dssim_total;
return inconsistency_total;
}
+#endif // CONFIG_INTERNAL_STATS
+#if CONFIG_AV1_HIGHBITDEPTH
+void aom_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r,
+ int rp, uint32_t *sum_s, uint32_t *sum_r,
+ uint32_t *sum_sq_s, uint32_t *sum_sq_r,
+ uint32_t *sum_sxr) {
+ int i, j;
+ for (i = 0; i < 8; i++, s += sp, r += rp) {
+ for (j = 0; j < 8; j++) {
+ *sum_s += s[j];
+ *sum_r += r[j];
+ *sum_sq_s += s[j] * s[j];
+ *sum_sq_r += r[j] * r[j];
+ *sum_sxr += s[j] * r[j];
+ }
+ }
+}
+
+static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
+ int rp, uint32_t bd, uint32_t shift) {
+ uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
+ aom_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
+ return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift),
+ sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd);
+}
+
+double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
+ int stride_img1, int stride_img2, int width, int height,
+ uint32_t bd, uint32_t shift) {
+ int i, j;
+ int samples = 0;
+ double ssim_total = 0;
+
+ // sample point start with each 4x4 location
+ for (i = 0; i <= height - 8;
+ i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
+ for (j = 0; j <= width - 8; j += 4) {
+ double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
+ CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd,
+ shift);
+ ssim_total += v;
+ samples++;
+ }
+ }
+ ssim_total /= samples;
+ return ssim_total;
+}
+
+#if CONFIG_INTERNAL_STATS
void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest, double *weight,
uint32_t bd, uint32_t in_bd, double *fast_ssim) {
@@ -455,3 +459,25 @@ void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
fast_ssim[1] = abc[0] * .8 + .1 * (abc[1] + abc[2]);
}
}
+#endif // CONFIG_INTERNAL_STATS
+#endif // CONFIG_AV1_HIGHBITDEPTH
+
+#if CONFIG_INTERNAL_STATS
+void aom_calc_ssim(const YV12_BUFFER_CONFIG *orig,
+ const YV12_BUFFER_CONFIG *recon, const uint32_t bit_depth,
+ const uint32_t in_bit_depth, int is_hbd, double *weight,
+ double *frame_ssim2) {
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (is_hbd) {
+ aom_highbd_calc_ssim(orig, recon, weight, bit_depth, in_bit_depth,
+ frame_ssim2);
+ return;
+ }
+#else
+ (void)bit_depth;
+ (void)in_bit_depth;
+ (void)is_hbd;
+#endif // CONFIG_AV1_HIGHBITDEPTH
+ aom_lowbd_calc_ssim(orig, recon, weight, frame_ssim2);
+}
+#endif // CONFIG_INTERNAL_STATS
diff --git a/third_party/libaom/source/libaom/aom_dsp/ssim.h b/third_party/libaom/source/libaom/aom_dsp/ssim.h
index d635ef5bbe..fb92556a8c 100644
--- a/third_party/libaom/source/libaom/aom_dsp/ssim.h
+++ b/third_party/libaom/source/libaom/aom_dsp/ssim.h
@@ -12,14 +12,13 @@
#ifndef AOM_AOM_DSP_SSIM_H_
#define AOM_AOM_DSP_SSIM_H_
-#define MAX_SSIM_DB 100.0;
-
#ifdef __cplusplus
extern "C" {
#endif
#include "config/aom_config.h"
+#if CONFIG_INTERNAL_STATS
#include "aom_scale/yv12config.h"
// metrics used for calculating ssim, ssim2, dssim, and ssimc
@@ -68,18 +67,35 @@ double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
int img2_pitch, int width, int height, Ssimv *sv2,
Metrics *m, int do_inconsistency);
-void aom_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight,
- double *fast_ssim);
+void aom_lowbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest, double *weight,
+ double *fast_ssim);
double aom_calc_fastssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest, double *ssim_y,
double *ssim_u, double *ssim_v, uint32_t bd,
uint32_t in_bd);
+#if CONFIG_AV1_HIGHBITDEPTH
void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest, double *weight,
uint32_t bd, uint32_t in_bd, double *fast_ssim);
+#endif // CONFIG_AV1_HIGHBITDEPTH
+
+void aom_calc_ssim(const YV12_BUFFER_CONFIG *orig,
+ const YV12_BUFFER_CONFIG *recon, const uint32_t bit_depth,
+ const uint32_t in_bit_depth, int is_hbd, double *weight,
+ double *frame_ssim2);
+#endif // CONFIG_INTERNAL_STATS
+
+double aom_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1,
+ int stride_img2, int width, int height);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
+ int stride_img1, int stride_img2, int width, int height,
+ uint32_t bd, uint32_t shift);
+#endif // CONFIG_AV1_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/libaom/source/libaom/aom_dsp/vmaf.c b/third_party/libaom/source/libaom/aom_dsp/vmaf.c
index 41653430c1..219e278303 100644
--- a/third_party/libaom/source/libaom/aom_dsp/vmaf.c
+++ b/third_party/libaom/source/libaom/aom_dsp/vmaf.c
@@ -12,9 +12,6 @@
#include "aom_dsp/vmaf.h"
#include <assert.h>
-#if !CONFIG_USE_VMAF_RC
-#include <libvmaf.h>
-#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -24,10 +21,7 @@
#include <unistd.h>
#endif
-#if CONFIG_USE_VMAF_RC
-#include <libvmaf/libvmaf.rc.h>
-#endif
-
+#include <libvmaf/libvmaf.h>
#include "aom_dsp/blend.h"
#include "aom_ports/system_state.h"
@@ -36,162 +30,18 @@ static void vmaf_fatal_error(const char *message) {
exit(EXIT_FAILURE);
}
-#if !CONFIG_USE_VMAF_RC
-typedef struct FrameData {
- const YV12_BUFFER_CONFIG *source;
- const YV12_BUFFER_CONFIG *distorted;
- int frame_set;
- int bit_depth;
-} FrameData;
-
-// A callback function used to pass data to VMAF.
-// Returns 0 after reading a frame.
-// Returns 2 when there is no more frame to read.
-static int read_frame(float *ref_data, float *main_data, float *temp_data,
- int stride, void *user_data) {
- FrameData *frames = (FrameData *)user_data;
-
- if (!frames->frame_set) {
- const int width = frames->source->y_width;
- const int height = frames->source->y_height;
- assert(width == frames->distorted->y_width);
- assert(height == frames->distorted->y_height);
-
- if (frames->source->flags & YV12_FLAG_HIGHBITDEPTH) {
- const float scale_factor = 1.0f / (float)(1 << (frames->bit_depth - 8));
- uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(frames->source->y_buffer);
- uint16_t *main_ptr = CONVERT_TO_SHORTPTR(frames->distorted->y_buffer);
-
- for (int row = 0; row < height; ++row) {
- for (int col = 0; col < width; ++col) {
- ref_data[col] = scale_factor * (float)ref_ptr[col];
- }
- ref_ptr += frames->source->y_stride;
- ref_data += stride / sizeof(*ref_data);
- }
-
- for (int row = 0; row < height; ++row) {
- for (int col = 0; col < width; ++col) {
- main_data[col] = scale_factor * (float)main_ptr[col];
- }
- main_ptr += frames->distorted->y_stride;
- main_data += stride / sizeof(*main_data);
- }
- } else {
- uint8_t *ref_ptr = frames->source->y_buffer;
- uint8_t *main_ptr = frames->distorted->y_buffer;
-
- for (int row = 0; row < height; ++row) {
- for (int col = 0; col < width; ++col) {
- ref_data[col] = (float)ref_ptr[col];
- }
- ref_ptr += frames->source->y_stride;
- ref_data += stride / sizeof(*ref_data);
- }
-
- for (int row = 0; row < height; ++row) {
- for (int col = 0; col < width; ++col) {
- main_data[col] = (float)main_ptr[col];
- }
- main_ptr += frames->distorted->y_stride;
- main_data += stride / sizeof(*main_data);
- }
- }
- frames->frame_set = 1;
- return 0;
- }
-
- (void)temp_data;
- return 2;
-}
-
-void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *distorted, const int bit_depth,
- double *const vmaf) {
- aom_clear_system_state();
- const int width = source->y_width;
- const int height = source->y_height;
- FrameData frames = { source, distorted, 0, bit_depth };
- char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p";
- double vmaf_score;
- const int ret =
- compute_vmaf(&vmaf_score, fmt, width, height, read_frame,
- /*user_data=*/&frames, (char *)model_path,
- /*log_path=*/NULL, /*log_fmt=*/NULL, /*disable_clip=*/1,
- /*disable_avx=*/0, /*enable_transform=*/0,
- /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0,
- /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0,
- /*n_subsample=*/1, /*enable_conf_interval=*/0);
- if (ret) vmaf_fatal_error("Failed to compute VMAF scores.");
-
- aom_clear_system_state();
- *vmaf = vmaf_score;
-}
-
-void aom_calc_vmaf_multi_frame(void *user_data, const char *model_path,
- int (*rd_frm)(float *ref_data, float *main_data,
- float *temp_data, int stride_byte,
- void *user_data),
- int frame_width, int frame_height, int bit_depth,
- double *vmaf) {
- aom_clear_system_state();
-
- char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p";
- int log_path_length = snprintf(NULL, 0, "vmaf_scores_%d.xml", getpid()) + 1;
- char *log_path = malloc(log_path_length);
- snprintf(log_path, log_path_length, "vmaf_scores_%d.xml", getpid());
- double vmaf_score;
- const int ret =
- compute_vmaf(&vmaf_score, fmt, frame_width, frame_height, rd_frm,
- /*user_data=*/user_data, (char *)model_path,
- /*log_path=*/log_path, /*log_fmt=*/NULL, /*disable_clip=*/0,
- /*disable_avx=*/0, /*enable_transform=*/0,
- /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0,
- /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0,
- /*n_subsample=*/1, /*enable_conf_interval=*/0);
- FILE *vmaf_log = fopen(log_path, "r");
- free(log_path);
- log_path = NULL;
- if (vmaf_log == NULL || ret) {
- vmaf_fatal_error("Failed to compute VMAF scores.");
- }
-
- int frame_index = 0;
- char buf[512];
- while (fgets(buf, 511, vmaf_log) != NULL) {
- if (memcmp(buf, "\t\t<frame ", 9) == 0) {
- char *p = strstr(buf, "vmaf=");
- if (p != NULL && p[5] == '"') {
- char *p2 = strstr(&p[6], "\"");
- *p2 = '\0';
- const double score = atof(&p[6]);
- if (score < 0.0 || score > 100.0) {
- vmaf_fatal_error("Failed to compute VMAF scores.");
- }
- vmaf[frame_index++] = score;
- }
- }
- }
- fclose(vmaf_log);
-
- aom_clear_system_state();
-}
-#endif
-
-#if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path) {
+void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path) {
if (*vmaf_model != NULL) return;
VmafModelConfig model_cfg;
model_cfg.flags = VMAF_MODEL_FLAG_DISABLE_CLIP;
model_cfg.name = "vmaf";
- model_cfg.path = (char *)model_path;
- if (vmaf_model_load_from_path(vmaf_model, &model_cfg)) {
+ if (vmaf_model_load_from_path(vmaf_model, &model_cfg, model_path)) {
vmaf_fatal_error("Failed to load VMAF model.");
}
}
-void aom_close_vmaf_model_rc(VmafModel *vmaf_model) {
+void aom_close_vmaf_model(VmafModel *vmaf_model) {
vmaf_model_destroy(vmaf_model);
}
@@ -221,8 +71,9 @@ static void copy_picture(const int bit_depth, const YV12_BUFFER_CONFIG *src,
}
}
-void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
- bool cal_vmaf_neg) {
+void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model,
+ bool cal_vmaf_neg) {
+ // TODO(sdeng): make them CLI arguments.
VmafConfiguration cfg;
cfg.log_level = VMAF_LOG_LEVEL_NONE;
cfg.n_threads = 0;
@@ -233,41 +84,53 @@ void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
vmaf_fatal_error("Failed to init VMAF context.");
}
- if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) {
- vmaf_fatal_error("Failed to load feature extractors from VMAF model.");
- }
-
if (cal_vmaf_neg) {
VmafFeatureDictionary *vif_feature = NULL;
- vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", "1.0");
- if (vmaf_use_feature(*vmaf_context, "float_vif", vif_feature)) {
+ if (vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit",
+ "1.0")) {
+ vmaf_fatal_error("Failed to set vif_enhn_gain_limit.");
+ }
+ if (vmaf_model_feature_overload(vmaf_model, "float_vif", vif_feature)) {
vmaf_fatal_error("Failed to use feature float_vif.");
}
VmafFeatureDictionary *adm_feature = NULL;
- vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", "1.0");
- if (vmaf_use_feature(*vmaf_context, "float_adm", adm_feature)) {
+ if (vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit",
+ "1.0")) {
+ vmaf_fatal_error("Failed to set adm_enhn_gain_limit.");
+ }
+ if (vmaf_model_feature_overload(vmaf_model, "adm", adm_feature)) {
vmaf_fatal_error("Failed to use feature float_adm.");
}
}
VmafFeatureDictionary *motion_force_zero = NULL;
- vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", "true");
- if (vmaf_use_feature(*vmaf_context, "float_motion", motion_force_zero)) {
+ if (vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero",
+ "1")) {
+ vmaf_fatal_error("Failed to set motion_force_zero.");
+ }
+ if (vmaf_model_feature_overload(vmaf_model, "float_motion",
+ motion_force_zero)) {
vmaf_fatal_error("Failed to use feature float_motion.");
}
+
+ if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) {
+ vmaf_fatal_error("Failed to load feature extractors from VMAF model.");
+ }
}
-void aom_close_vmaf_context_rc(VmafContext *vmaf_context) {
+void aom_close_vmaf_context(VmafContext *vmaf_context) {
if (vmaf_close(vmaf_context)) {
vmaf_fatal_error("Failed to close VMAF context.");
}
}
-void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
- const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *distorted,
- int bit_depth, int frame_index, double *vmaf) {
+void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted, int bit_depth,
+ bool cal_vmaf_neg, double *vmaf) {
+ VmafContext *vmaf_context;
+ aom_init_vmaf_context(&vmaf_context, vmaf_model, cal_vmaf_neg);
+ const int frame_index = 0;
VmafPicture ref, dist;
if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width,
source->y_height) ||
@@ -282,10 +145,50 @@ void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
vmaf_fatal_error("Failed to read VMAF pictures.");
}
+ if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) {
+ vmaf_fatal_error("Failed to flush context.");
+ }
+
vmaf_picture_unref(&ref);
vmaf_picture_unref(&dist);
vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index);
+ aom_close_vmaf_context(vmaf_context);
}
-#endif // CONFIG_USE_VMAF_RC
+void aom_read_vmaf_image(VmafContext *vmaf_context,
+ const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted, int bit_depth,
+ int frame_index) {
+ VmafPicture ref, dist;
+ if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width,
+ source->y_height) ||
+ vmaf_picture_alloc(&dist, VMAF_PIX_FMT_YUV420P, bit_depth,
+ source->y_width, source->y_height)) {
+ vmaf_fatal_error("Failed to alloc VMAF pictures.");
+ }
+ copy_picture(bit_depth, source, &ref);
+ copy_picture(bit_depth, distorted, &dist);
+ if (vmaf_read_pictures(vmaf_context, &ref, &dist,
+ /*picture index=*/frame_index)) {
+ vmaf_fatal_error("Failed to read VMAF pictures.");
+ }
+
+ vmaf_picture_unref(&ref);
+ vmaf_picture_unref(&dist);
+}
+
+double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model,
+ int frame_index) {
+ double vmaf;
+ if (vmaf_score_at_index(vmaf_context, vmaf_model, &vmaf, frame_index)) {
+ vmaf_fatal_error("Failed to calc VMAF scores.");
+ }
+ return vmaf;
+}
+
+void aom_flush_vmaf_context(VmafContext *vmaf_context) {
+ if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) {
+ vmaf_fatal_error("Failed to flush context.");
+ }
+}
diff --git a/third_party/libaom/source/libaom/aom_dsp/vmaf.h b/third_party/libaom/source/libaom/aom_dsp/vmaf.h
index d9da223e29..3ba8c8d565 100644
--- a/third_party/libaom/source/libaom/aom_dsp/vmaf.h
+++ b/third_party/libaom/source/libaom/aom_dsp/vmaf.h
@@ -15,33 +15,28 @@
#include <stdbool.h>
#include "aom_scale/yv12config.h"
-#if CONFIG_USE_VMAF_RC
typedef struct VmafContext VmafContext;
typedef struct VmafModel VmafModel;
-#endif
-
-#if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
- bool cal_vmaf_neg);
-void aom_close_vmaf_context_rc(VmafContext *vmaf_context);
-
-void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path);
-void aom_close_vmaf_model_rc(VmafModel *vmaf_model);
-
-void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
- const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *distorted,
- int bit_depth, int frame_index, double *vmaf);
-#else
-void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
+
+void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model,
+ bool cal_vmaf_neg);
+void aom_close_vmaf_context(VmafContext *vmaf_context);
+
+void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path);
+void aom_close_vmaf_model(VmafModel *vmaf_model);
+
+void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *distorted, int bit_depth,
- double *vmaf);
-
-void aom_calc_vmaf_multi_frame(
- void *user_data, const char *model_path,
- int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
- int stride_byte, void *user_data),
- int frame_width, int frame_height, int bit_depth, double *vmaf);
-#endif // CONFIG_USE_VMAF_RC
+ bool cal_vmaf_neg, double *vmaf);
+
+void aom_read_vmaf_image(VmafContext *vmaf_context,
+ const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted, int bit_depth,
+ int frame_index);
+
+double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model,
+ int frame_index);
+
+void aom_flush_vmaf_context(VmafContext *vmaf_context);
#endif // AOM_AOM_DSP_VMAF_H_
diff --git a/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm b/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm
index 58f1ac964e..a2510d5e7f 100644
--- a/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm
+++ b/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm
@@ -20,20 +20,21 @@ SECTION .text
; Arg 2: Height
; Arg 3: Number of general purpose registers: 5 for 32-bit build, 6 for 64-bit
; Arg 4: Type of function: if 0, normal sad; if 1, avg; if 2, skip rows
-%macro HIGH_SAD_FN 4
+; Arg 5: Number of xmm registers. 8xh needs 8, others only need 7
+%macro HIGH_SAD_FN 4-5 7
%if %4 == 0
%if %3 == 5
-cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows
+cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, n_rows
%else ; %3 == 7
-cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \
+cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, \
src_stride3, ref_stride3, n_rows
%endif ; %3 == 5/7
%elif %4 == 1 ; avg
%if %3 == 5
-cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \
+cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \
+cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
@@ -356,7 +357,7 @@ HIGH_SAD16XN 8, 2 ; highbd_sad_skip_16x8_sse2
; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro HIGH_SAD8XN 1-2 0
- HIGH_SAD_FN 8, %1, 7, %2
+ HIGH_SAD_FN 8, %1, 7, %2, 8
%if %2 == 2 ; skip rows, so divide number of rows by 2
mov n_rowsd, %1/8
%else
@@ -377,22 +378,30 @@ HIGH_SAD16XN 8, 2 ; highbd_sad_skip_16x8_sse2
pavgw m4, [second_predq+mmsize*3]
lea second_predq, [second_predq+mmsize*4]
%endif
- mova m5, [srcq]
- psubusw m5, m1
- psubusw m1, [srcq]
+ mova m7, m1
+ movu m5, [srcq]
+ psubusw m1, m5
+ psubusw m5, m7
por m1, m5
- mova m5, [srcq+src_strideq*2]
- psubusw m5, m2
- psubusw m2, [srcq+src_strideq*2]
+
+ mova m7, m2
+ movu m5, [srcq+src_strideq*2]
+ psubusw m2, m5
+ psubusw m5, m7
por m2, m5
- mova m5, [srcq+src_strideq*4]
- psubusw m5, m3
- psubusw m3, [srcq+src_strideq*4]
+
+ mova m7, m3
+ movu m5, [srcq+src_strideq*4]
+ psubusw m3, m5
+ psubusw m5, m7
por m3, m5
- mova m5, [srcq+src_stride3q*2]
- psubusw m5, m4
- psubusw m4, [srcq+src_stride3q*2]
+
+ mova m7, m4
+ movu m5, [srcq+src_stride3q*2]
+ psubusw m4, m5
+ psubusw m5, m7
por m4, m5
+
paddw m1, m2
paddw m3, m4
movhlps m2, m1
diff --git a/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c b/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
index f779270ae3..163e4cc566 100644
--- a/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
+++ b/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
@@ -616,7 +616,7 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2(
src += src_stride;
dst += dst_stride;
}
- } else if (y_offset == 8) {
+ } else if (y_offset == 4) {
__m256i src_next_reg;
for (i = 0; i < height; i++) {
LOAD_SRC_DST
@@ -652,8 +652,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2(
dst += dst_stride;
}
}
- // x_offset = 8 and y_offset = 0
- } else if (x_offset == 8) {
+ // x_offset = 4 and y_offset = 0
+ } else if (x_offset == 4) {
if (y_offset == 0) {
__m256i src_next_reg;
for (i = 0; i < height; i++) {
@@ -668,8 +668,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2(
src += src_stride;
dst += dst_stride;
}
- // x_offset = 8 and y_offset = 8
- } else if (y_offset == 8) {
+ // x_offset = 4 and y_offset = 4
+ } else if (y_offset == 4) {
__m256i src_next_reg, src_avg;
// load source and another source starting from the next
// following byte
@@ -691,7 +691,7 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2(
CALC_SUM_SSE_INSIDE_LOOP
dst += dst_stride;
}
- // x_offset = 8 and y_offset = bilin interpolation
+ // x_offset = 4 and y_offset = bilin interpolation
} else {
__m256i filter, pw8, src_next_reg, src_avg;
y_offset <<= 5;
@@ -741,8 +741,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2(
src += src_stride;
dst += dst_stride;
}
- // x_offset = bilin interpolation and y_offset = 8
- } else if (y_offset == 8) {
+ // x_offset = bilin interpolation and y_offset = 4
+ } else if (y_offset == 4) {
__m256i filter, pw8, src_next_reg, src_pack;
x_offset <<= 5;
filter = _mm256_load_si256(