diff options
Diffstat (limited to 'av1/common/x86/resize_ssse3.c')
-rw-r--r-- | av1/common/x86/resize_ssse3.c | 125 |
1 files changed, 76 insertions, 49 deletions
diff --git a/av1/common/x86/resize_ssse3.c b/av1/common/x86/resize_ssse3.c index 0d871de71..a7fdb5a9a 100644 --- a/av1/common/x86/resize_ssse3.c +++ b/av1/common/x86/resize_ssse3.c @@ -809,13 +809,44 @@ static void scale_plane_1_to_2_phase_0(const uint8_t *src, } while (--y); } +// There's SIMD optimizations for 1/4, 1/2 and 3/4 downscaling and 2x upscaling +// in SSSE3. +static INLINE bool has_normative_scaler_ssse3(const int src_width, + const int src_height, + const int dst_width, + const int dst_height) { + const bool has_normative_scaler = + (2 * dst_width == src_width && 2 * dst_height == src_height) || + (4 * dst_width == src_width && 4 * dst_height == src_height) || + (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) || + (dst_width == src_width * 2 && dst_height == src_height * 2); + + return has_normative_scaler; +} + void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, const InterpFilter filter, const int phase, const int num_planes) { + bool has_normative_scaler = + has_normative_scaler_ssse3(src->y_crop_width, src->y_crop_height, + dst->y_crop_width, dst->y_crop_height); + + if (num_planes > 1) { + has_normative_scaler = + has_normative_scaler && + has_normative_scaler_ssse3(src->uv_crop_width, src->uv_crop_height, + dst->uv_crop_width, dst->uv_crop_height); + } + + if (!has_normative_scaler) { + av1_resize_and_extend_frame_c(src, dst, filter, phase, num_planes); + return; + } + // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet // the static analysis warnings. - int scaled = 0; + int malloc_failed = 0; for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { const int is_uv = i > 0; const int src_w = src->crop_widths[is_uv]; @@ -828,7 +859,6 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, if (2 * dst_w == src_w && 2 * dst_h == src_h) { // 2 to 1 - scaled = 1; if (phase == 0) { scale_plane_2_to_1_phase_0(src->buffers[i], src->strides[is_uv], dst->buffers[i], dst->strides[is_uv], dst_w, @@ -845,22 +875,20 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, const int buffer_height = (2 * dst_y_h + SUBPEL_TAPS - 2 + 7) & ~7; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height); - if (temp_buffer) { - const InterpKernel *interp_kernel = - (const InterpKernel *)av1_interp_filter_params_list[filter] - .filter_ptr; - scale_plane_2_to_1_general(src->buffers[i], src->strides[is_uv], - dst->buffers[i], dst->strides[is_uv], - dst_w, dst_h, interp_kernel[phase], - temp_buffer); - free(temp_buffer); - } else { - scaled = 0; + if (!temp_buffer) { + malloc_failed = 1; + break; } + const InterpKernel *interp_kernel = + (const InterpKernel *)av1_interp_filter_params_list[filter] + .filter_ptr; + scale_plane_2_to_1_general(src->buffers[i], src->strides[is_uv], + dst->buffers[i], dst->strides[is_uv], dst_w, + dst_h, interp_kernel[phase], temp_buffer); + free(temp_buffer); } } else if (4 * dst_w == src_w && 4 * dst_h == src_h) { // 4 to 1 - scaled = 1; if (phase == 0) { scale_plane_4_to_1_phase_0(src->buffers[i], src->strides[is_uv], dst->buffers[i], dst->strides[is_uv], dst_w, @@ -880,18 +908,17 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, const int extra_padding = 16; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height + extra_padding); - if (temp_buffer) { - const InterpKernel *interp_kernel = - (const InterpKernel *)av1_interp_filter_params_list[filter] - .filter_ptr; - scale_plane_4_to_1_general(src->buffers[i], src->strides[is_uv], - dst->buffers[i], dst->strides[is_uv], - dst_w, dst_h, interp_kernel[phase], - temp_buffer); - free(temp_buffer); - } else { - scaled = 0; + if (!temp_buffer) { + malloc_failed = 1; + break; } + const InterpKernel *interp_kernel = + (const InterpKernel *)av1_interp_filter_params_list[filter] + .filter_ptr; + scale_plane_4_to_1_general(src->buffers[i], src->strides[is_uv], + dst->buffers[i], dst->strides[is_uv], dst_w, + dst_h, interp_kernel[phase], temp_buffer); + free(temp_buffer); } } else if (4 * dst_w == 3 * src_w && 4 * dst_h == 3 * src_h) { // 4 to 3 @@ -910,36 +937,36 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, : 0; const int buffer_size = buffer_stride_hor * buffer_height + extra_padding; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_size); - if (temp_buffer) { - scaled = 1; - const InterpKernel *interp_kernel = - (const InterpKernel *)av1_interp_filter_params_list[filter] - .filter_ptr; - scale_plane_4_to_3_general(src->buffers[i], src->strides[is_uv], - dst->buffers[i], dst->strides[is_uv], dst_w, - dst_h, interp_kernel, phase, temp_buffer); - free(temp_buffer); - } else { - scaled = 0; + if (!temp_buffer) { + malloc_failed = 1; + break; } - } else if (dst_w == src_w * 2 && dst_h == src_h * 2) { + const InterpKernel *interp_kernel = + (const InterpKernel *)av1_interp_filter_params_list[filter] + .filter_ptr; + scale_plane_4_to_3_general(src->buffers[i], src->strides[is_uv], + dst->buffers[i], dst->strides[is_uv], dst_w, + dst_h, interp_kernel, phase, temp_buffer); + free(temp_buffer); + } else { + assert(dst_w == src_w * 2 && dst_h == src_h * 2); // 1 to 2 uint8_t *const temp_buffer = (uint8_t *)malloc(8 * ((src_y_w + 7) & ~7)); - if (temp_buffer) { - scaled = 1; - const InterpKernel *interp_kernel = - (const InterpKernel *)av1_interp_filter_params_list[filter] - .filter_ptr; - scale_plane_1_to_2_phase_0(src->buffers[i], src->strides[is_uv], - dst->buffers[i], dst->strides[is_uv], src_w, - src_h, interp_kernel[8], temp_buffer); - free(temp_buffer); - } else { - scaled = 0; + if (!temp_buffer) { + malloc_failed = 1; + break; } + const InterpKernel *interp_kernel = + (const InterpKernel *)av1_interp_filter_params_list[filter] + .filter_ptr; + scale_plane_1_to_2_phase_0(src->buffers[i], src->strides[is_uv], + dst->buffers[i], dst->strides[is_uv], src_w, + src_h, interp_kernel[8], temp_buffer); + free(temp_buffer); } } - if (!scaled) { + + if (malloc_failed) { av1_resize_and_extend_frame_c(src, dst, filter, phase, num_planes); } else { aom_extend_frame_borders(dst, num_planes); |