diff options
Diffstat (limited to 'third_party/libaom/source/libaom/aom_dsp')
13 files changed, 325 insertions, 321 deletions
diff --git a/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c b/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c index 6d41708ee0..945e7e48ee 100644 --- a/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c +++ b/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c @@ -11,8 +11,6 @@ #include <arm_neon.h> -#include "common/tools_common.h" - #include "config/aom_config.h" #include "config/aom_dsp_rtcd.h" diff --git a/third_party/libaom/source/libaom/aom_dsp/butteraugli.c b/third_party/libaom/source/libaom/aom_dsp/butteraugli.c index 7ce2324c06..038efcd313 100644 --- a/third_party/libaom/source/libaom/aom_dsp/butteraugli.c +++ b/third_party/libaom/source/libaom/aom_dsp/butteraugli.c @@ -18,37 +18,71 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *distorted, int bit_depth, - float *dist_map) { + aom_matrix_coefficients_t matrix_coefficients, + aom_color_range_t color_range, float *dist_map) { (void)bit_depth; assert(bit_depth == 8); - assert(source->y_width == source->uv_width * 2); const int width = source->y_crop_width; const int height = source->y_crop_height; + const int ss_x = source->subsampling_x; + const int ss_y = source->subsampling_y; - size_t buffer_size = width * height * 3; - uint8_t *src_rgb = (uint8_t *)aom_malloc(buffer_size); - uint8_t *distorted_rgb = (uint8_t *)aom_malloc(buffer_size); - if (!src_rgb || !distorted_rgb) { - aom_free(src_rgb); - aom_free(distorted_rgb); + const struct YuvConstants *yuv_constants; + if (matrix_coefficients == AOM_CICP_MC_BT_709) { + if (color_range == AOM_CR_FULL_RANGE) return 0; + yuv_constants = &kYuvH709Constants; + } else { + yuv_constants = color_range == AOM_CR_FULL_RANGE ? &kYuvJPEGConstants + : &kYuvI601Constants; + } + + const size_t stride_argb = width * 4; + const size_t buffer_size = height * stride_argb; + uint8_t *src_argb = (uint8_t *)aom_malloc(buffer_size); + uint8_t *distorted_argb = (uint8_t *)aom_malloc(buffer_size); + if (!src_argb || !distorted_argb) { + aom_free(src_argb); + aom_free(distorted_argb); return 0; } - I420ToRGB24Matrix(source->y_buffer, source->y_stride, source->u_buffer, - source->uv_stride, source->v_buffer, source->uv_stride, - src_rgb, width * 3, &kYuvH709Constants, width, height); - I420ToRGB24Matrix(distorted->y_buffer, distorted->y_stride, - distorted->u_buffer, distorted->uv_stride, - distorted->v_buffer, distorted->uv_stride, distorted_rgb, - width * 3, &kYuvH709Constants, width, height); + if (ss_x == 1 && ss_y == 1) { + I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else if (ss_x == 1 && ss_y == 0) { + I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else if (ss_x == 0 && ss_y == 0) { + I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer, + source->uv_stride, source->v_buffer, source->uv_stride, + src_argb, stride_argb, yuv_constants, width, height); + I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride, + distorted->u_buffer, distorted->uv_stride, + distorted->v_buffer, distorted->uv_stride, distorted_argb, + stride_argb, yuv_constants, width, height); + } else { + aom_free(src_argb); + aom_free(distorted_argb); + return 0; + } - JxlPixelFormat pixel_format = { 3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 }; + JxlPixelFormat pixel_format = { 4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 }; JxlButteraugliApi *api = JxlButteraugliApiCreate(NULL); JxlButteraugliApiSetHFAsymmetry(api, 0.8f); JxlButteraugliResult *result = JxlButteraugliCompute( - api, width, height, &pixel_format, src_rgb, buffer_size, &pixel_format, - distorted_rgb, buffer_size); + api, width, height, &pixel_format, src_argb, buffer_size, &pixel_format, + distorted_argb, buffer_size); const float *distmap = NULL; uint32_t row_stride; @@ -56,8 +90,8 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, if (distmap == NULL) { JxlButteraugliApiDestroy(api); JxlButteraugliResultDestroy(result); - aom_free(src_rgb); - aom_free(distorted_rgb); + aom_free(src_argb); + aom_free(distorted_argb); return 0; } @@ -69,7 +103,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, JxlButteraugliApiDestroy(api); JxlButteraugliResultDestroy(result); - aom_free(src_rgb); - aom_free(distorted_rgb); + aom_free(src_argb); + aom_free(distorted_argb); return 1; } diff --git a/third_party/libaom/source/libaom/aom_dsp/butteraugli.h b/third_party/libaom/source/libaom/aom_dsp/butteraugli.h index 06402aa3e4..5304092ccb 100644 --- a/third_party/libaom/source/libaom/aom_dsp/butteraugli.h +++ b/third_party/libaom/source/libaom/aom_dsp/butteraugli.h @@ -14,8 +14,10 @@ #include "aom_scale/yv12config.h" +// Returns a boolean that indicates success/failure. int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *distorted, int bit_depth, - float *dist_map); + aom_matrix_coefficients_t matrix_coefficients, + aom_color_range_t color_range, float *dist_map); #endif // AOM_AOM_DSP_BUTTERAUGLI_H_ diff --git a/third_party/libaom/source/libaom/aom_dsp/fastssim.c b/third_party/libaom/source/libaom/aom_dsp/fastssim.c index 3804519b31..89712c5f40 100644 --- a/third_party/libaom/source/libaom/aom_dsp/fastssim.c +++ b/third_party/libaom/source/libaom/aom_dsp/fastssim.c @@ -31,6 +31,7 @@ typedef struct fs_ctx fs_ctx; #define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01) #define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03) #define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03) +#define MAX_SSIM_DB 100.0 #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) diff --git a/third_party/libaom/source/libaom/aom_dsp/grain_table.c b/third_party/libaom/source/libaom/aom_dsp/grain_table.c index e03f04d5da..b22752abd9 100644 --- a/third_party/libaom/source/libaom/aom_dsp/grain_table.c +++ b/third_party/libaom/source/libaom/aom_dsp/grain_table.c @@ -202,7 +202,7 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp, int64_t end_time, int erase, aom_film_grain_t *grain) { aom_film_grain_table_entry_t *entry = t->head; - aom_film_grain_table_entry_t *prev_entry = 0; + aom_film_grain_table_entry_t *prev_entry = NULL; uint16_t random_seed = grain ? grain->random_seed : 0; if (grain) memset(grain, 0, sizeof(*grain)); @@ -241,10 +241,10 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp, entry->end_time = time_stamp; if (t->tail == entry) t->tail = new_entry; } - // If segments aren't aligned, delete from the beggining of subsequent + // If segments aren't aligned, delete from the beginning of subsequent // segments if (end_time > entry_end_time) { - aom_film_grain_table_lookup(t, entry->end_time, end_time, 1, 0); + aom_film_grain_table_lookup(t, entry_end_time, end_time, 1, 0); } return 1; } @@ -275,12 +275,12 @@ aom_codec_err_t aom_film_grain_table_read( return error_info->error_code; } - aom_film_grain_table_entry_t *prev_entry = 0; + aom_film_grain_table_entry_t *prev_entry = NULL; while (!feof(file)) { aom_film_grain_table_entry_t *entry = aom_malloc(sizeof(*entry)); memset(entry, 0, sizeof(*entry)); grain_table_entry_read(file, error_info, entry); - entry->next = 0; + entry->next = NULL; if (prev_entry) prev_entry->next = entry; if (!t->head) t->head = entry; diff --git a/third_party/libaom/source/libaom/aom_dsp/noise_model.c b/third_party/libaom/source/libaom/aom_dsp/noise_model.c index f56fdd5860..19c660e911 100644 --- a/third_party/libaom/source/libaom/aom_dsp/noise_model.c +++ b/third_party/libaom/source/libaom/aom_dsp/noise_model.c @@ -214,6 +214,7 @@ static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) { int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) { if (!lut) return 0; + if (num_points <= 0) return 0; lut->num_points = 0; lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points)); if (!lut->points) return 0; @@ -1152,12 +1153,24 @@ int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model, // Convert the scaling functions to 8 bit values aom_noise_strength_lut_t scaling_points[3]; - aom_noise_strength_solver_fit_piecewise( - &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0); - aom_noise_strength_solver_fit_piecewise( - &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1); - aom_noise_strength_solver_fit_piecewise( - &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2); + if (!aom_noise_strength_solver_fit_piecewise( + &noise_model->combined_state[0].strength_solver, 14, + scaling_points + 0)) { + return 0; + } + if (!aom_noise_strength_solver_fit_piecewise( + &noise_model->combined_state[1].strength_solver, 10, + scaling_points + 1)) { + aom_noise_strength_lut_free(scaling_points + 0); + return 0; + } + if (!aom_noise_strength_solver_fit_piecewise( + &noise_model->combined_state[2].strength_solver, 10, + scaling_points + 2)) { + aom_noise_strength_lut_free(scaling_points + 0); + aom_noise_strength_lut_free(scaling_points + 1); + return 0; + } // Both the domain and the range of the scaling functions in the film_grain // are normalized to 8-bit (e.g., they are implicitly scaled during grain diff --git a/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c b/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c index 69a1d99bf2..25f075aa2f 100644 --- a/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c +++ b/third_party/libaom/source/libaom/aom_dsp/psnrhvs.c @@ -34,6 +34,7 @@ static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3; } +#if CONFIG_AV1_HIGHBITDEPTH static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) { int i, j; @@ -43,6 +44,7 @@ static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, for (j = 0; j < 8; j++) *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3; } +#endif // CONFIG_AV1_HIGHBITDEPTH /* Normalized inverse quantization matrix for 8x8 DCT at the point of * transparency. This is not the JPEG based matrix from the paper, @@ -210,6 +212,7 @@ static double calc_psnrhvs(const unsigned char *src, int _systride, } } s_gvar = 1.f / (36 - n + 1) * s_gmean / 36.f; +#if CONFIG_AV1_HIGHBITDEPTH if (!buf_is_hbd) { od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); @@ -217,6 +220,10 @@ static double calc_psnrhvs(const unsigned char *src, int _systride, hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); } +#else + od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); + od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); +#endif // CONFIG_AV1_HIGHBITDEPTH for (i = 0; i < 8; i++) for (j = (i == 0); j < 8; j++) s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j]; diff --git a/third_party/libaom/source/libaom/aom_dsp/ssim.c b/third_party/libaom/source/libaom/aom_dsp/ssim.c index 357da99ae4..c5334fd2c5 100644 --- a/third_party/libaom/source/libaom/aom_dsp/ssim.c +++ b/third_party/libaom/source/libaom/aom_dsp/ssim.c @@ -18,6 +18,7 @@ #include "aom_ports/mem.h" #include "aom_ports/system_state.h" +#if CONFIG_INTERNAL_STATS void aom_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, @@ -33,6 +34,7 @@ void aom_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp, } } } +#endif // CONFIG_INTERNAL_STATS void aom_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, @@ -49,24 +51,6 @@ void aom_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp, } } -#if CONFIG_AV1_HIGHBITDEPTH -void aom_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r, - int rp, uint32_t *sum_s, uint32_t *sum_r, - uint32_t *sum_sq_s, uint32_t *sum_sq_r, - uint32_t *sum_sxr) { - int i, j; - for (i = 0; i < 8; i++, s += sp, r += rp) { - for (j = 0; j < 8; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} -#endif - static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2 @@ -78,7 +62,7 @@ static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count, uint32_t bd) { double ssim_n, ssim_d; - int64_t c1, c2; + int64_t c1 = 0, c2 = 0; if (bd == 8) { // scale the constants by number of pixels c1 = (cc1 * count * count) >> 12; @@ -90,8 +74,9 @@ static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, c1 = (cc1_12 * count * count) >> 12; c2 = (cc2_12 * count * count) >> 12; } else { - c1 = c2 = 0; assert(0); + // Return similarity as zero for unsupported bit-depth values. + return 0; } ssim_n = (2.0 * sum_s * sum_r + c1) * @@ -111,21 +96,11 @@ static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); } -static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, - int rp, uint32_t bd, uint32_t shift) { - uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - aom_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, - &sum_sxr); - return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift), - sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd); -} - // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. -static double aom_ssim2(const uint8_t *img1, const uint8_t *img2, - int stride_img1, int stride_img2, int width, - int height) { +double aom_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, + int stride_img2, int width, int height) { int i, j; int samples = 0; double ssim_total = 0; @@ -143,31 +118,10 @@ static double aom_ssim2(const uint8_t *img1, const uint8_t *img2, return ssim_total; } -static double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, - int stride_img1, int stride_img2, int width, - int height, uint32_t bd, uint32_t shift) { - int i, j; - int samples = 0; - double ssim_total = 0; - - // sample point start with each 4x4 location - for (i = 0; i <= height - 8; - i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { - for (j = 0; j <= width - 8; j += 4) { - double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, - CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd, - shift); - ssim_total += v; - samples++; - } - } - ssim_total /= samples; - return ssim_total; -} - -void aom_calc_ssim(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, double *weight, - double *fast_ssim) { +#if CONFIG_INTERNAL_STATS +void aom_lowbd_calc_ssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, double *weight, + double *fast_ssim) { double abc[3]; for (int i = 0; i < 3; ++i) { const int is_uv = i > 0; @@ -421,7 +375,57 @@ double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, m->dssim = dssim_total; return inconsistency_total; } +#endif // CONFIG_INTERNAL_STATS +#if CONFIG_AV1_HIGHBITDEPTH +void aom_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r, + int rp, uint32_t *sum_s, uint32_t *sum_r, + uint32_t *sum_sq_s, uint32_t *sum_sq_r, + uint32_t *sum_sxr) { + int i, j; + for (i = 0; i < 8; i++, s += sp, r += rp) { + for (j = 0; j < 8; j++) { + *sum_s += s[j]; + *sum_r += r[j]; + *sum_sq_s += s[j] * s[j]; + *sum_sq_r += r[j] * r[j]; + *sum_sxr += s[j] * r[j]; + } + } +} + +static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, + int rp, uint32_t bd, uint32_t shift) { + uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; + aom_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); + return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift), + sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd); +} + +double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, + int stride_img1, int stride_img2, int width, int height, + uint32_t bd, uint32_t shift) { + int i, j; + int samples = 0; + double ssim_total = 0; + + // sample point start with each 4x4 location + for (i = 0; i <= height - 8; + i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { + for (j = 0; j <= width - 8; j += 4) { + double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, + CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd, + shift); + ssim_total += v; + samples++; + } + } + ssim_total /= samples; + return ssim_total; +} + +#if CONFIG_INTERNAL_STATS void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, uint32_t bd, uint32_t in_bd, double *fast_ssim) { @@ -455,3 +459,25 @@ void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, fast_ssim[1] = abc[0] * .8 + .1 * (abc[1] + abc[2]); } } +#endif // CONFIG_INTERNAL_STATS +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if CONFIG_INTERNAL_STATS +void aom_calc_ssim(const YV12_BUFFER_CONFIG *orig, + const YV12_BUFFER_CONFIG *recon, const uint32_t bit_depth, + const uint32_t in_bit_depth, int is_hbd, double *weight, + double *frame_ssim2) { +#if CONFIG_AV1_HIGHBITDEPTH + if (is_hbd) { + aom_highbd_calc_ssim(orig, recon, weight, bit_depth, in_bit_depth, + frame_ssim2); + return; + } +#else + (void)bit_depth; + (void)in_bit_depth; + (void)is_hbd; +#endif // CONFIG_AV1_HIGHBITDEPTH + aom_lowbd_calc_ssim(orig, recon, weight, frame_ssim2); +} +#endif // CONFIG_INTERNAL_STATS diff --git a/third_party/libaom/source/libaom/aom_dsp/ssim.h b/third_party/libaom/source/libaom/aom_dsp/ssim.h index d635ef5bbe..fb92556a8c 100644 --- a/third_party/libaom/source/libaom/aom_dsp/ssim.h +++ b/third_party/libaom/source/libaom/aom_dsp/ssim.h @@ -12,14 +12,13 @@ #ifndef AOM_AOM_DSP_SSIM_H_ #define AOM_AOM_DSP_SSIM_H_ -#define MAX_SSIM_DB 100.0; - #ifdef __cplusplus extern "C" { #endif #include "config/aom_config.h" +#if CONFIG_INTERNAL_STATS #include "aom_scale/yv12config.h" // metrics used for calculating ssim, ssim2, dssim, and ssimc @@ -68,18 +67,35 @@ double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, int width, int height, Ssimv *sv2, Metrics *m, int do_inconsistency); -void aom_calc_ssim(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, double *weight, - double *fast_ssim); +void aom_lowbd_calc_ssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, double *weight, + double *fast_ssim); double aom_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *ssim_y, double *ssim_u, double *ssim_v, uint32_t bd, uint32_t in_bd); +#if CONFIG_AV1_HIGHBITDEPTH void aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, uint32_t bd, uint32_t in_bd, double *fast_ssim); +#endif // CONFIG_AV1_HIGHBITDEPTH + +void aom_calc_ssim(const YV12_BUFFER_CONFIG *orig, + const YV12_BUFFER_CONFIG *recon, const uint32_t bit_depth, + const uint32_t in_bit_depth, int is_hbd, double *weight, + double *frame_ssim2); +#endif // CONFIG_INTERNAL_STATS + +double aom_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, + int stride_img2, int width, int height); + +#if CONFIG_AV1_HIGHBITDEPTH +double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, + int stride_img1, int stride_img2, int width, int height, + uint32_t bd, uint32_t shift); +#endif // CONFIG_AV1_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/third_party/libaom/source/libaom/aom_dsp/vmaf.c b/third_party/libaom/source/libaom/aom_dsp/vmaf.c index 41653430c1..219e278303 100644 --- a/third_party/libaom/source/libaom/aom_dsp/vmaf.c +++ b/third_party/libaom/source/libaom/aom_dsp/vmaf.c @@ -12,9 +12,6 @@ #include "aom_dsp/vmaf.h" #include <assert.h> -#if !CONFIG_USE_VMAF_RC -#include <libvmaf.h> -#endif #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -24,10 +21,7 @@ #include <unistd.h> #endif -#if CONFIG_USE_VMAF_RC -#include <libvmaf/libvmaf.rc.h> -#endif - +#include <libvmaf/libvmaf.h> #include "aom_dsp/blend.h" #include "aom_ports/system_state.h" @@ -36,162 +30,18 @@ static void vmaf_fatal_error(const char *message) { exit(EXIT_FAILURE); } -#if !CONFIG_USE_VMAF_RC -typedef struct FrameData { - const YV12_BUFFER_CONFIG *source; - const YV12_BUFFER_CONFIG *distorted; - int frame_set; - int bit_depth; -} FrameData; - -// A callback function used to pass data to VMAF. -// Returns 0 after reading a frame. -// Returns 2 when there is no more frame to read. -static int read_frame(float *ref_data, float *main_data, float *temp_data, - int stride, void *user_data) { - FrameData *frames = (FrameData *)user_data; - - if (!frames->frame_set) { - const int width = frames->source->y_width; - const int height = frames->source->y_height; - assert(width == frames->distorted->y_width); - assert(height == frames->distorted->y_height); - - if (frames->source->flags & YV12_FLAG_HIGHBITDEPTH) { - const float scale_factor = 1.0f / (float)(1 << (frames->bit_depth - 8)); - uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(frames->source->y_buffer); - uint16_t *main_ptr = CONVERT_TO_SHORTPTR(frames->distorted->y_buffer); - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - ref_data[col] = scale_factor * (float)ref_ptr[col]; - } - ref_ptr += frames->source->y_stride; - ref_data += stride / sizeof(*ref_data); - } - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - main_data[col] = scale_factor * (float)main_ptr[col]; - } - main_ptr += frames->distorted->y_stride; - main_data += stride / sizeof(*main_data); - } - } else { - uint8_t *ref_ptr = frames->source->y_buffer; - uint8_t *main_ptr = frames->distorted->y_buffer; - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - ref_data[col] = (float)ref_ptr[col]; - } - ref_ptr += frames->source->y_stride; - ref_data += stride / sizeof(*ref_data); - } - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - main_data[col] = (float)main_ptr[col]; - } - main_ptr += frames->distorted->y_stride; - main_data += stride / sizeof(*main_data); - } - } - frames->frame_set = 1; - return 0; - } - - (void)temp_data; - return 2; -} - -void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, const int bit_depth, - double *const vmaf) { - aom_clear_system_state(); - const int width = source->y_width; - const int height = source->y_height; - FrameData frames = { source, distorted, 0, bit_depth }; - char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p"; - double vmaf_score; - const int ret = - compute_vmaf(&vmaf_score, fmt, width, height, read_frame, - /*user_data=*/&frames, (char *)model_path, - /*log_path=*/NULL, /*log_fmt=*/NULL, /*disable_clip=*/1, - /*disable_avx=*/0, /*enable_transform=*/0, - /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0, - /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0, - /*n_subsample=*/1, /*enable_conf_interval=*/0); - if (ret) vmaf_fatal_error("Failed to compute VMAF scores."); - - aom_clear_system_state(); - *vmaf = vmaf_score; -} - -void aom_calc_vmaf_multi_frame(void *user_data, const char *model_path, - int (*rd_frm)(float *ref_data, float *main_data, - float *temp_data, int stride_byte, - void *user_data), - int frame_width, int frame_height, int bit_depth, - double *vmaf) { - aom_clear_system_state(); - - char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p"; - int log_path_length = snprintf(NULL, 0, "vmaf_scores_%d.xml", getpid()) + 1; - char *log_path = malloc(log_path_length); - snprintf(log_path, log_path_length, "vmaf_scores_%d.xml", getpid()); - double vmaf_score; - const int ret = - compute_vmaf(&vmaf_score, fmt, frame_width, frame_height, rd_frm, - /*user_data=*/user_data, (char *)model_path, - /*log_path=*/log_path, /*log_fmt=*/NULL, /*disable_clip=*/0, - /*disable_avx=*/0, /*enable_transform=*/0, - /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0, - /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0, - /*n_subsample=*/1, /*enable_conf_interval=*/0); - FILE *vmaf_log = fopen(log_path, "r"); - free(log_path); - log_path = NULL; - if (vmaf_log == NULL || ret) { - vmaf_fatal_error("Failed to compute VMAF scores."); - } - - int frame_index = 0; - char buf[512]; - while (fgets(buf, 511, vmaf_log) != NULL) { - if (memcmp(buf, "\t\t<frame ", 9) == 0) { - char *p = strstr(buf, "vmaf="); - if (p != NULL && p[5] == '"') { - char *p2 = strstr(&p[6], "\""); - *p2 = '\0'; - const double score = atof(&p[6]); - if (score < 0.0 || score > 100.0) { - vmaf_fatal_error("Failed to compute VMAF scores."); - } - vmaf[frame_index++] = score; - } - } - } - fclose(vmaf_log); - - aom_clear_system_state(); -} -#endif - -#if CONFIG_USE_VMAF_RC -void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path) { +void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path) { if (*vmaf_model != NULL) return; VmafModelConfig model_cfg; model_cfg.flags = VMAF_MODEL_FLAG_DISABLE_CLIP; model_cfg.name = "vmaf"; - model_cfg.path = (char *)model_path; - if (vmaf_model_load_from_path(vmaf_model, &model_cfg)) { + if (vmaf_model_load_from_path(vmaf_model, &model_cfg, model_path)) { vmaf_fatal_error("Failed to load VMAF model."); } } -void aom_close_vmaf_model_rc(VmafModel *vmaf_model) { +void aom_close_vmaf_model(VmafModel *vmaf_model) { vmaf_model_destroy(vmaf_model); } @@ -221,8 +71,9 @@ static void copy_picture(const int bit_depth, const YV12_BUFFER_CONFIG *src, } } -void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model, - bool cal_vmaf_neg) { +void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model, + bool cal_vmaf_neg) { + // TODO(sdeng): make them CLI arguments. VmafConfiguration cfg; cfg.log_level = VMAF_LOG_LEVEL_NONE; cfg.n_threads = 0; @@ -233,41 +84,53 @@ void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model, vmaf_fatal_error("Failed to init VMAF context."); } - if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) { - vmaf_fatal_error("Failed to load feature extractors from VMAF model."); - } - if (cal_vmaf_neg) { VmafFeatureDictionary *vif_feature = NULL; - vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", "1.0"); - if (vmaf_use_feature(*vmaf_context, "float_vif", vif_feature)) { + if (vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", + "1.0")) { + vmaf_fatal_error("Failed to set vif_enhn_gain_limit."); + } + if (vmaf_model_feature_overload(vmaf_model, "float_vif", vif_feature)) { vmaf_fatal_error("Failed to use feature float_vif."); } VmafFeatureDictionary *adm_feature = NULL; - vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", "1.0"); - if (vmaf_use_feature(*vmaf_context, "float_adm", adm_feature)) { + if (vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", + "1.0")) { + vmaf_fatal_error("Failed to set adm_enhn_gain_limit."); + } + if (vmaf_model_feature_overload(vmaf_model, "adm", adm_feature)) { vmaf_fatal_error("Failed to use feature float_adm."); } } VmafFeatureDictionary *motion_force_zero = NULL; - vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", "true"); - if (vmaf_use_feature(*vmaf_context, "float_motion", motion_force_zero)) { + if (vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", + "1")) { + vmaf_fatal_error("Failed to set motion_force_zero."); + } + if (vmaf_model_feature_overload(vmaf_model, "float_motion", + motion_force_zero)) { vmaf_fatal_error("Failed to use feature float_motion."); } + + if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) { + vmaf_fatal_error("Failed to load feature extractors from VMAF model."); + } } -void aom_close_vmaf_context_rc(VmafContext *vmaf_context) { +void aom_close_vmaf_context(VmafContext *vmaf_context) { if (vmaf_close(vmaf_context)) { vmaf_fatal_error("Failed to close VMAF context."); } } -void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model, - const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, - int bit_depth, int frame_index, double *vmaf) { +void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + bool cal_vmaf_neg, double *vmaf) { + VmafContext *vmaf_context; + aom_init_vmaf_context(&vmaf_context, vmaf_model, cal_vmaf_neg); + const int frame_index = 0; VmafPicture ref, dist; if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width, source->y_height) || @@ -282,10 +145,50 @@ void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model, vmaf_fatal_error("Failed to read VMAF pictures."); } + if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) { + vmaf_fatal_error("Failed to flush context."); + } + vmaf_picture_unref(&ref); vmaf_picture_unref(&dist); vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index); + aom_close_vmaf_context(vmaf_context); } -#endif // CONFIG_USE_VMAF_RC +void aom_read_vmaf_image(VmafContext *vmaf_context, + const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + int frame_index) { + VmafPicture ref, dist; + if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width, + source->y_height) || + vmaf_picture_alloc(&dist, VMAF_PIX_FMT_YUV420P, bit_depth, + source->y_width, source->y_height)) { + vmaf_fatal_error("Failed to alloc VMAF pictures."); + } + copy_picture(bit_depth, source, &ref); + copy_picture(bit_depth, distorted, &dist); + if (vmaf_read_pictures(vmaf_context, &ref, &dist, + /*picture index=*/frame_index)) { + vmaf_fatal_error("Failed to read VMAF pictures."); + } + + vmaf_picture_unref(&ref); + vmaf_picture_unref(&dist); +} + +double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model, + int frame_index) { + double vmaf; + if (vmaf_score_at_index(vmaf_context, vmaf_model, &vmaf, frame_index)) { + vmaf_fatal_error("Failed to calc VMAF scores."); + } + return vmaf; +} + +void aom_flush_vmaf_context(VmafContext *vmaf_context) { + if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) { + vmaf_fatal_error("Failed to flush context."); + } +} diff --git a/third_party/libaom/source/libaom/aom_dsp/vmaf.h b/third_party/libaom/source/libaom/aom_dsp/vmaf.h index d9da223e29..3ba8c8d565 100644 --- a/third_party/libaom/source/libaom/aom_dsp/vmaf.h +++ b/third_party/libaom/source/libaom/aom_dsp/vmaf.h @@ -15,33 +15,28 @@ #include <stdbool.h> #include "aom_scale/yv12config.h" -#if CONFIG_USE_VMAF_RC typedef struct VmafContext VmafContext; typedef struct VmafModel VmafModel; -#endif - -#if CONFIG_USE_VMAF_RC -void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model, - bool cal_vmaf_neg); -void aom_close_vmaf_context_rc(VmafContext *vmaf_context); - -void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path); -void aom_close_vmaf_model_rc(VmafModel *vmaf_model); - -void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model, - const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, - int bit_depth, int frame_index, double *vmaf); -#else -void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source, + +void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model, + bool cal_vmaf_neg); +void aom_close_vmaf_context(VmafContext *vmaf_context); + +void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path); +void aom_close_vmaf_model(VmafModel *vmaf_model); + +void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *distorted, int bit_depth, - double *vmaf); - -void aom_calc_vmaf_multi_frame( - void *user_data, const char *model_path, - int (*read_frame)(float *ref_data, float *main_data, float *temp_data, - int stride_byte, void *user_data), - int frame_width, int frame_height, int bit_depth, double *vmaf); -#endif // CONFIG_USE_VMAF_RC + bool cal_vmaf_neg, double *vmaf); + +void aom_read_vmaf_image(VmafContext *vmaf_context, + const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + int frame_index); + +double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model, + int frame_index); + +void aom_flush_vmaf_context(VmafContext *vmaf_context); #endif // AOM_AOM_DSP_VMAF_H_ diff --git a/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm b/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm index 58f1ac964e..a2510d5e7f 100644 --- a/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm +++ b/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_sse2.asm @@ -20,20 +20,21 @@ SECTION .text ; Arg 2: Height ; Arg 3: Number of general purpose registers: 5 for 32-bit build, 6 for 64-bit ; Arg 4: Type of function: if 0, normal sad; if 1, avg; if 2, skip rows -%macro HIGH_SAD_FN 4 +; Arg 5: Number of xmm registers. 8xh needs 8, others only need 7 +%macro HIGH_SAD_FN 4-5 7 %if %4 == 0 %if %3 == 5 -cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows +cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \ +cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, \ src_stride3, ref_stride3, n_rows %endif ; %3 == 5/7 %elif %4 == 1 ; avg %if %3 == 5 -cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \ +cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \ +cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 @@ -356,7 +357,7 @@ HIGH_SAD16XN 8, 2 ; highbd_sad_skip_16x8_sse2 ; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD8XN 1-2 0 - HIGH_SAD_FN 8, %1, 7, %2 + HIGH_SAD_FN 8, %1, 7, %2, 8 %if %2 == 2 ; skip rows, so divide number of rows by 2 mov n_rowsd, %1/8 %else @@ -377,22 +378,30 @@ HIGH_SAD16XN 8, 2 ; highbd_sad_skip_16x8_sse2 pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif - mova m5, [srcq] - psubusw m5, m1 - psubusw m1, [srcq] + mova m7, m1 + movu m5, [srcq] + psubusw m1, m5 + psubusw m5, m7 por m1, m5 - mova m5, [srcq+src_strideq*2] - psubusw m5, m2 - psubusw m2, [srcq+src_strideq*2] + + mova m7, m2 + movu m5, [srcq+src_strideq*2] + psubusw m2, m5 + psubusw m5, m7 por m2, m5 - mova m5, [srcq+src_strideq*4] - psubusw m5, m3 - psubusw m3, [srcq+src_strideq*4] + + mova m7, m3 + movu m5, [srcq+src_strideq*4] + psubusw m3, m5 + psubusw m5, m7 por m3, m5 - mova m5, [srcq+src_stride3q*2] - psubusw m5, m4 - psubusw m4, [srcq+src_stride3q*2] + + mova m7, m4 + movu m5, [srcq+src_stride3q*2] + psubusw m4, m5 + psubusw m5, m7 por m4, m5 + paddw m1, m2 paddw m3, m4 movhlps m2, m1 diff --git a/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c b/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c index f779270ae3..163e4cc566 100644 --- a/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c +++ b/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c @@ -616,7 +616,7 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2( src += src_stride; dst += dst_stride; } - } else if (y_offset == 8) { + } else if (y_offset == 4) { __m256i src_next_reg; for (i = 0; i < height; i++) { LOAD_SRC_DST @@ -652,8 +652,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2( dst += dst_stride; } } - // x_offset = 8 and y_offset = 0 - } else if (x_offset == 8) { + // x_offset = 4 and y_offset = 0 + } else if (x_offset == 4) { if (y_offset == 0) { __m256i src_next_reg; for (i = 0; i < height; i++) { @@ -668,8 +668,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2( src += src_stride; dst += dst_stride; } - // x_offset = 8 and y_offset = 8 - } else if (y_offset == 8) { + // x_offset = 4 and y_offset = 4 + } else if (y_offset == 4) { __m256i src_next_reg, src_avg; // load source and another source starting from the next // following byte @@ -691,7 +691,7 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2( CALC_SUM_SSE_INSIDE_LOOP dst += dst_stride; } - // x_offset = 8 and y_offset = bilin interpolation + // x_offset = 4 and y_offset = bilin interpolation } else { __m256i filter, pw8, src_next_reg, src_avg; y_offset <<= 5; @@ -741,8 +741,8 @@ unsigned int aom_sub_pixel_avg_variance32xh_avx2( src += src_stride; dst += dst_stride; } - // x_offset = bilin interpolation and y_offset = 8 - } else if (y_offset == 8) { + // x_offset = bilin interpolation and y_offset = 4 + } else if (y_offset == 4) { __m256i filter, pw8, src_next_reg, src_pack; x_offset <<= 5; filter = _mm256_load_si256( |