aboutsummaryrefslogtreecommitdiff
path: root/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c')
-rw-r--r--libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c135
1 files changed, 42 insertions, 93 deletions
diff --git a/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c b/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c
index 4fa24512c..a7f5117cf 100644
--- a/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c
+++ b/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c
@@ -191,13 +191,11 @@ static INLINE void highbd_read_chroma_dist_row_8(
}
static void vp9_highbd_apply_temporal_filter_luma_8(
- const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre,
- int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src,
- int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre,
- int uv_pre_stride, unsigned int block_width, unsigned int block_height,
- int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum,
- uint16_t *y_count, const uint32_t *y_dist, const uint32_t *u_dist,
- const uint32_t *v_dist, const uint32_t *const *neighbors_first,
+ const uint16_t *y_pre, int y_pre_stride, unsigned int block_width,
+ unsigned int block_height, int ss_x, int ss_y, int strength,
+ int use_whole_blk, uint32_t *y_accum, uint16_t *y_count,
+ const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist,
+ const uint32_t *const *neighbors_first,
const uint32_t *const *neighbors_second, int top_weight,
int bottom_weight) {
const int rounding = (1 << strength) >> 1;
@@ -256,17 +254,12 @@ static void vp9_highbd_apply_temporal_filter_luma_8(
highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count,
y_accum);
- y_src += y_src_stride;
y_pre += y_pre_stride;
y_count += y_pre_stride;
y_accum += y_pre_stride;
y_dist += DIST_STRIDE;
- u_src += uv_src_stride;
- u_pre += uv_pre_stride;
u_dist += DIST_STRIDE;
- v_src += uv_src_stride;
- v_pre += uv_pre_stride;
v_dist += DIST_STRIDE;
// Then all the rows except the last one
@@ -300,11 +293,7 @@ static void vp9_highbd_apply_temporal_filter_luma_8(
highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second,
&v_first, &v_second);
- u_src += uv_src_stride;
- u_pre += uv_pre_stride;
u_dist += DIST_STRIDE;
- v_src += uv_src_stride;
- v_pre += uv_pre_stride;
v_dist += DIST_STRIDE;
}
@@ -320,7 +309,6 @@ static void vp9_highbd_apply_temporal_filter_luma_8(
highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count,
y_accum);
- y_src += y_src_stride;
y_pre += y_pre_stride;
y_count += y_pre_stride;
y_accum += y_pre_stride;
@@ -364,13 +352,10 @@ static void vp9_highbd_apply_temporal_filter_luma_8(
// Perform temporal filter for the luma component.
static void vp9_highbd_apply_temporal_filter_luma(
- const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre,
- int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src,
- int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre,
- int uv_pre_stride, unsigned int block_width, unsigned int block_height,
- int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk,
- uint32_t *y_accum, uint16_t *y_count, const uint32_t *y_dist,
- const uint32_t *u_dist, const uint32_t *v_dist) {
+ const uint16_t *y_pre, int y_pre_stride, unsigned int block_width,
+ unsigned int block_height, int ss_x, int ss_y, int strength,
+ const int *blk_fw, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count,
+ const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist) {
unsigned int blk_col = 0, uv_blk_col = 0;
const unsigned int blk_col_step = 8, uv_blk_col_step = 8 >> ss_x;
const unsigned int mid_width = block_width >> 1,
@@ -384,9 +369,7 @@ static void vp9_highbd_apply_temporal_filter_luma(
neighbors_first = HIGHBD_LUMA_LEFT_COLUMN_NEIGHBORS;
neighbors_second = HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS;
vp9_highbd_apply_temporal_filter_luma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col,
- v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y,
+ y_pre + blk_col, y_pre_stride, blk_col_step, block_height, ss_x, ss_y,
strength, use_whole_blk, y_accum + blk_col, y_count + blk_col,
y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col,
neighbors_first, neighbors_second, top_weight, bottom_weight);
@@ -399,13 +382,10 @@ static void vp9_highbd_apply_temporal_filter_luma(
for (; blk_col < mid_width;
blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) {
vp9_highbd_apply_temporal_filter_luma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
- u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step,
- block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col,
- y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col,
- v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight,
- bottom_weight);
+ y_pre + blk_col, y_pre_stride, blk_col_step, block_height, ss_x, ss_y,
+ strength, use_whole_blk, y_accum + blk_col, y_count + blk_col,
+ y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col,
+ neighbors_first, neighbors_second, top_weight, bottom_weight);
}
if (!use_whole_blk) {
@@ -417,21 +397,16 @@ static void vp9_highbd_apply_temporal_filter_luma(
for (; blk_col < last_width;
blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) {
vp9_highbd_apply_temporal_filter_luma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
- u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step,
- block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col,
- y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col,
- v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight,
- bottom_weight);
+ y_pre + blk_col, y_pre_stride, blk_col_step, block_height, ss_x, ss_y,
+ strength, use_whole_blk, y_accum + blk_col, y_count + blk_col,
+ y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col,
+ neighbors_first, neighbors_second, top_weight, bottom_weight);
}
// Right
neighbors_second = HIGHBD_LUMA_RIGHT_COLUMN_NEIGHBORS;
vp9_highbd_apply_temporal_filter_luma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col,
- v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y,
+ y_pre + blk_col, y_pre_stride, blk_col_step, block_height, ss_x, ss_y,
strength, use_whole_blk, y_accum + blk_col, y_count + blk_col,
y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col,
neighbors_first, neighbors_second, top_weight, bottom_weight);
@@ -491,13 +466,11 @@ static INLINE void highbd_add_luma_dist_to_8_chroma_mod(
// blk_fw as an array of size 4 for the weights for each of the 4 subblocks,
// else use top_weight for top half, and bottom weight for bottom half.
static void vp9_highbd_apply_temporal_filter_chroma_8(
- const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre,
- int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src,
- int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre,
- int uv_pre_stride, unsigned int uv_block_width,
- unsigned int uv_block_height, int ss_x, int ss_y, int strength,
- uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count,
- const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist,
+ const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride,
+ unsigned int uv_block_width, unsigned int uv_block_height, int ss_x,
+ int ss_y, int strength, uint32_t *u_accum, uint16_t *u_count,
+ uint32_t *v_accum, uint16_t *v_count, const uint32_t *y_dist,
+ const uint32_t *u_dist, const uint32_t *v_dist,
const uint32_t *const *neighbors_fst, const uint32_t *const *neighbors_snd,
int top_weight, int bottom_weight, const int *blk_fw) {
const int rounding = (1 << strength) >> 1;
@@ -565,10 +538,8 @@ static void vp9_highbd_apply_temporal_filter_chroma_8(
highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count,
v_accum);
- u_src += uv_src_stride;
u_pre += uv_pre_stride;
u_dist += DIST_STRIDE;
- v_src += uv_src_stride;
v_pre += uv_pre_stride;
v_dist += DIST_STRIDE;
u_count += uv_pre_stride;
@@ -576,8 +547,6 @@ static void vp9_highbd_apply_temporal_filter_chroma_8(
v_count += uv_pre_stride;
v_accum += uv_pre_stride;
- y_src += y_src_stride * (1 + ss_y);
- y_pre += y_pre_stride * (1 + ss_y);
y_dist += DIST_STRIDE * (1 + ss_y);
// Then all the rows except the last one
@@ -649,10 +618,8 @@ static void vp9_highbd_apply_temporal_filter_chroma_8(
highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count,
v_accum);
- u_src += uv_src_stride;
u_pre += uv_pre_stride;
u_dist += DIST_STRIDE;
- v_src += uv_src_stride;
v_pre += uv_pre_stride;
v_dist += DIST_STRIDE;
u_count += uv_pre_stride;
@@ -660,8 +627,6 @@ static void vp9_highbd_apply_temporal_filter_chroma_8(
v_count += uv_pre_stride;
v_accum += uv_pre_stride;
- y_src += y_src_stride * (1 + ss_y);
- y_pre += y_pre_stride * (1 + ss_y);
y_dist += DIST_STRIDE * (1 + ss_y);
}
@@ -720,12 +685,10 @@ static void vp9_highbd_apply_temporal_filter_chroma_8(
// Perform temporal filter for the chroma components.
static void vp9_highbd_apply_temporal_filter_chroma(
- const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre,
- int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src,
- int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre,
- int uv_pre_stride, unsigned int block_width, unsigned int block_height,
- int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk,
- uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count,
+ const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride,
+ unsigned int block_width, unsigned int block_height, int ss_x, int ss_y,
+ int strength, const int *blk_fw, int use_whole_blk, uint32_t *u_accum,
+ uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count,
const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist) {
const unsigned int uv_width = block_width >> ss_x,
uv_height = block_height >> ss_y;
@@ -755,8 +718,6 @@ static void vp9_highbd_apply_temporal_filter_chroma(
if (use_whole_blk) {
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
@@ -764,8 +725,6 @@ static void vp9_highbd_apply_temporal_filter_chroma(
neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL);
} else {
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
@@ -789,13 +748,11 @@ static void vp9_highbd_apply_temporal_filter_chroma(
}
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col,
- v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y,
- strength, u_accum + uv_blk_col, u_count + uv_blk_col,
- v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col,
- u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd,
- top_weight, bottom_weight, NULL);
+ u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
+ uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
+ u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
+ y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst,
+ neighbors_snd, top_weight, bottom_weight, NULL);
blk_col += blk_col_step;
uv_blk_col += uv_blk_col_step;
@@ -812,8 +769,6 @@ static void vp9_highbd_apply_temporal_filter_chroma(
for (; uv_blk_col < uv_mid_width;
blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) {
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
@@ -830,8 +785,6 @@ static void vp9_highbd_apply_temporal_filter_chroma(
for (; uv_blk_col < uv_last_width;
blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) {
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride,
u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
@@ -849,13 +802,11 @@ static void vp9_highbd_apply_temporal_filter_chroma(
}
vp9_highbd_apply_temporal_filter_chroma_8(
- y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride,
- u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col,
- v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y,
- strength, u_accum + uv_blk_col, u_count + uv_blk_col,
- v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col,
- u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd,
- top_weight, bottom_weight, NULL);
+ u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width,
+ uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col,
+ u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col,
+ y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst,
+ neighbors_snd, top_weight, bottom_weight, NULL);
}
void vp9_highbd_apply_temporal_filter_sse4_1(
@@ -929,14 +880,12 @@ void vp9_highbd_apply_temporal_filter_sse4_1(
u_dist_ptr = u_dist + 1;
v_dist_ptr = v_dist + 1;
- vp9_highbd_apply_temporal_filter_luma(
- y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride,
- u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y,
- strength, blk_fw, use_whole_blk, y_accum, y_count, y_dist_ptr, u_dist_ptr,
- v_dist_ptr);
+ vp9_highbd_apply_temporal_filter_luma(y_pre, y_pre_stride, block_width,
+ block_height, ss_x, ss_y, strength,
+ blk_fw, use_whole_blk, y_accum, y_count,
+ y_dist_ptr, u_dist_ptr, v_dist_ptr);
vp9_highbd_apply_temporal_filter_chroma(
- y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride,
u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y,
strength, blk_fw, use_whole_blk, u_accum, u_count, v_accum, v_count,
y_dist_ptr, u_dist_ptr, v_dist_ptr);