aboutsummaryrefslogtreecommitdiff
path: root/av1/common/x86/resize_ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'av1/common/x86/resize_ssse3.c')
-rw-r--r--av1/common/x86/resize_ssse3.c125
1 files changed, 76 insertions, 49 deletions
diff --git a/av1/common/x86/resize_ssse3.c b/av1/common/x86/resize_ssse3.c
index 0d871de71..a7fdb5a9a 100644
--- a/av1/common/x86/resize_ssse3.c
+++ b/av1/common/x86/resize_ssse3.c
@@ -809,13 +809,44 @@ static void scale_plane_1_to_2_phase_0(const uint8_t *src,
} while (--y);
}
+// There's SIMD optimizations for 1/4, 1/2 and 3/4 downscaling and 2x upscaling
+// in SSSE3.
+static INLINE bool has_normative_scaler_ssse3(const int src_width,
+ const int src_height,
+ const int dst_width,
+ const int dst_height) {
+ const bool has_normative_scaler =
+ (2 * dst_width == src_width && 2 * dst_height == src_height) ||
+ (4 * dst_width == src_width && 4 * dst_height == src_height) ||
+ (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) ||
+ (dst_width == src_width * 2 && dst_height == src_height * 2);
+
+ return has_normative_scaler;
+}
+
void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
const InterpFilter filter,
const int phase, const int num_planes) {
+ bool has_normative_scaler =
+ has_normative_scaler_ssse3(src->y_crop_width, src->y_crop_height,
+ dst->y_crop_width, dst->y_crop_height);
+
+ if (num_planes > 1) {
+ has_normative_scaler =
+ has_normative_scaler &&
+ has_normative_scaler_ssse3(src->uv_crop_width, src->uv_crop_height,
+ dst->uv_crop_width, dst->uv_crop_height);
+ }
+
+ if (!has_normative_scaler) {
+ av1_resize_and_extend_frame_c(src, dst, filter, phase, num_planes);
+ return;
+ }
+
// We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
// the static analysis warnings.
- int scaled = 0;
+ int malloc_failed = 0;
for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
const int is_uv = i > 0;
const int src_w = src->crop_widths[is_uv];
@@ -828,7 +859,6 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
if (2 * dst_w == src_w && 2 * dst_h == src_h) {
// 2 to 1
- scaled = 1;
if (phase == 0) {
scale_plane_2_to_1_phase_0(src->buffers[i], src->strides[is_uv],
dst->buffers[i], dst->strides[is_uv], dst_w,
@@ -845,22 +875,20 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
const int buffer_height = (2 * dst_y_h + SUBPEL_TAPS - 2 + 7) & ~7;
uint8_t *const temp_buffer =
(uint8_t *)malloc(buffer_stride * buffer_height);
- if (temp_buffer) {
- const InterpKernel *interp_kernel =
- (const InterpKernel *)av1_interp_filter_params_list[filter]
- .filter_ptr;
- scale_plane_2_to_1_general(src->buffers[i], src->strides[is_uv],
- dst->buffers[i], dst->strides[is_uv],
- dst_w, dst_h, interp_kernel[phase],
- temp_buffer);
- free(temp_buffer);
- } else {
- scaled = 0;
+ if (!temp_buffer) {
+ malloc_failed = 1;
+ break;
}
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[filter]
+ .filter_ptr;
+ scale_plane_2_to_1_general(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv], dst_w,
+ dst_h, interp_kernel[phase], temp_buffer);
+ free(temp_buffer);
}
} else if (4 * dst_w == src_w && 4 * dst_h == src_h) {
// 4 to 1
- scaled = 1;
if (phase == 0) {
scale_plane_4_to_1_phase_0(src->buffers[i], src->strides[is_uv],
dst->buffers[i], dst->strides[is_uv], dst_w,
@@ -880,18 +908,17 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
const int extra_padding = 16;
uint8_t *const temp_buffer =
(uint8_t *)malloc(buffer_stride * buffer_height + extra_padding);
- if (temp_buffer) {
- const InterpKernel *interp_kernel =
- (const InterpKernel *)av1_interp_filter_params_list[filter]
- .filter_ptr;
- scale_plane_4_to_1_general(src->buffers[i], src->strides[is_uv],
- dst->buffers[i], dst->strides[is_uv],
- dst_w, dst_h, interp_kernel[phase],
- temp_buffer);
- free(temp_buffer);
- } else {
- scaled = 0;
+ if (!temp_buffer) {
+ malloc_failed = 1;
+ break;
}
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[filter]
+ .filter_ptr;
+ scale_plane_4_to_1_general(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv], dst_w,
+ dst_h, interp_kernel[phase], temp_buffer);
+ free(temp_buffer);
}
} else if (4 * dst_w == 3 * src_w && 4 * dst_h == 3 * src_h) {
// 4 to 3
@@ -910,36 +937,36 @@ void av1_resize_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
: 0;
const int buffer_size = buffer_stride_hor * buffer_height + extra_padding;
uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_size);
- if (temp_buffer) {
- scaled = 1;
- const InterpKernel *interp_kernel =
- (const InterpKernel *)av1_interp_filter_params_list[filter]
- .filter_ptr;
- scale_plane_4_to_3_general(src->buffers[i], src->strides[is_uv],
- dst->buffers[i], dst->strides[is_uv], dst_w,
- dst_h, interp_kernel, phase, temp_buffer);
- free(temp_buffer);
- } else {
- scaled = 0;
+ if (!temp_buffer) {
+ malloc_failed = 1;
+ break;
}
- } else if (dst_w == src_w * 2 && dst_h == src_h * 2) {
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[filter]
+ .filter_ptr;
+ scale_plane_4_to_3_general(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv], dst_w,
+ dst_h, interp_kernel, phase, temp_buffer);
+ free(temp_buffer);
+ } else {
+ assert(dst_w == src_w * 2 && dst_h == src_h * 2);
// 1 to 2
uint8_t *const temp_buffer = (uint8_t *)malloc(8 * ((src_y_w + 7) & ~7));
- if (temp_buffer) {
- scaled = 1;
- const InterpKernel *interp_kernel =
- (const InterpKernel *)av1_interp_filter_params_list[filter]
- .filter_ptr;
- scale_plane_1_to_2_phase_0(src->buffers[i], src->strides[is_uv],
- dst->buffers[i], dst->strides[is_uv], src_w,
- src_h, interp_kernel[8], temp_buffer);
- free(temp_buffer);
- } else {
- scaled = 0;
+ if (!temp_buffer) {
+ malloc_failed = 1;
+ break;
}
+ const InterpKernel *interp_kernel =
+ (const InterpKernel *)av1_interp_filter_params_list[filter]
+ .filter_ptr;
+ scale_plane_1_to_2_phase_0(src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv], src_w,
+ src_h, interp_kernel[8], temp_buffer);
+ free(temp_buffer);
}
}
- if (!scaled) {
+
+ if (malloc_failed) {
av1_resize_and_extend_frame_c(src, dst, filter, phase, num_planes);
} else {
aom_extend_frame_borders(dst, num_planes);