diff options
author | Vignesh Venkatasubramanian <vigneshv@google.com> | 2015-08-25 11:05:01 -0700 |
---|---|---|
committer | Vignesh Venkatasubramanian <vigneshv@google.com> | 2015-10-19 18:20:40 +0000 |
commit | 7ce0a1d1337c01056ba24006efab21f00e179e04 (patch) | |
tree | 149789b855abffb1fc6aa96cb7afefe0420a9b12 /libvpx/vp9/common/vp9_postproc.c | |
parent | 4ec72fb10684276b048d5b3a6ba7a88d43b06411 (diff) | |
download | libvpx-7ce0a1d1337c01056ba24006efab21f00e179e04.tar.gz |
libvpx: Pull from upstream
Current HEAD: 7105df53d7dc13d5e575bc8df714ec8d1da36b06
Includes security fixes and performance improvements. Also removed the VP10
related code from the upstream repository.
BUG=23452792
Change-Id: I97452dff5b1f0756e19d621111797363cc533d46
(cherry picked from commit da49e34c1fb5e99681f4ad99c21d9cfd83eddb96)
Diffstat (limited to 'libvpx/vp9/common/vp9_postproc.c')
-rw-r--r-- | libvpx/vp9/common/vp9_postproc.c | 364 |
1 files changed, 334 insertions, 30 deletions
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c index abda4e682..71ab86150 100644 --- a/libvpx/vp9/common/vp9_postproc.c +++ b/libvpx/vp9/common/vp9_postproc.c @@ -16,20 +16,21 @@ #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/system_state.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_postproc.h" -#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_textblit.h" #if CONFIG_VP9_POSTPROC -static const short kernel5[] = { +static const int16_t kernel5[] = { 1, 1, 4, 1, 1 }; -const short vp9_rv[] = { +const int16_t vp9_rv[] = { 8, 5, 2, 2, 8, 12, 4, 9, 8, 3, 0, 3, 9, 0, 0, 0, 8, 3, 14, 4, 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, @@ -76,6 +77,9 @@ const short vp9_rv[] = { 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, }; +static const uint8_t q_diff_thresh = 20; +static const uint8_t last_q_thresh = 170; + void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, @@ -85,10 +89,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, int flimit) { uint8_t const *p_src; uint8_t *p_dst; - int row; - int col; - int i; - int v; + int row, col, i, v, kernel; int pitch = src_pixels_per_line; uint8_t d[8]; (void)dst_pixels_per_line; @@ -99,8 +100,8 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, p_dst = dst_ptr; for (col = 0; col < cols; col++) { - int kernel = 4; - int v = p_src[col]; + kernel = 4; + v = p_src[col]; for (i = -2; i <= 2; i++) { if (abs(v - p_src[col + i * pitch]) > flimit) @@ -122,7 +123,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, d[i] = p_src[i]; for (col = 0; col < cols; col++) { - int kernel = 4; + kernel = 4; v = p_src[col]; d[col & 7] = v; @@ -152,6 +153,81 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, } } +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, + uint16_t *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, + int rows, + int cols, + int flimit) { + uint16_t const *p_src; + uint16_t *p_dst; + int row, col, i, v, kernel; + int pitch = src_pixels_per_line; + uint16_t d[8]; + + for (row = 0; row < rows; row++) { + // post_proc_down for one row. + p_src = src_ptr; + p_dst = dst_ptr; + + for (col = 0; col < cols; col++) { + kernel = 4; + v = p_src[col]; + + for (i = -2; i <= 2; i++) { + if (abs(v - p_src[col + i * pitch]) > flimit) + goto down_skip_convolve; + + kernel += kernel5[2 + i] * p_src[col + i * pitch]; + } + + v = (kernel >> 3); + + down_skip_convolve: + p_dst[col] = v; + } + + /* now post_proc_across */ + p_src = dst_ptr; + p_dst = dst_ptr; + + for (i = 0; i < 8; i++) + d[i] = p_src[i]; + + for (col = 0; col < cols; col++) { + kernel = 4; + v = p_src[col]; + + d[col & 7] = v; + + for (i = -2; i <= 2; i++) { + if (abs(v - p_src[col + i]) > flimit) + goto across_skip_convolve; + + kernel += kernel5[2 + i] * p_src[col + i]; + } + + d[col & 7] = (kernel >> 3); + + across_skip_convolve: + if (col >= 2) + p_dst[col - 2] = d[(col - 2) & 7]; + } + + /* handle the last two pixels */ + p_dst[col - 2] = d[(col - 2) & 7]; + p_dst[col - 1] = d[(col - 1) & 7]; + + + /* next row */ + src_ptr += pitch; + dst_ptr += dst_pixels_per_line; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static int q2mbl(int x) { if (x < 20) x = 20; @@ -162,10 +238,46 @@ static int q2mbl(int x) { void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch, int rows, int cols, int flimit) { int r, c, i; - uint8_t *s = src; uint8_t d[16]; + for (r = 0; r < rows; r++) { + int sumsq = 0; + int sum = 0; + + for (i = -8; i <= 6; i++) { + sumsq += s[i] * s[i]; + sum += s[i]; + d[i + 8] = 0; + } + + for (c = 0; c < cols + 8; c++) { + int x = s[c + 7] - s[c - 8]; + int y = s[c + 7] + s[c - 8]; + + sum += x; + sumsq += x * y; + + d[c & 15] = s[c]; + + if (sumsq * 15 - sum * sum < flimit) { + d[c & 15] = (8 + sum + s[c]) >> 4; + } + + s[c - 8] = d[(c - 8) & 15]; + } + s += pitch; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, + int rows, int cols, int flimit) { + int r, c, i; + + uint16_t *s = src; + uint16_t d[16]; + for (r = 0; r < rows; r++) { int sumsq = 0; @@ -196,6 +308,7 @@ void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch, s += pitch; } } +#endif // CONFIG_VP9_HIGHBITDEPTH void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int flimit) { @@ -207,7 +320,7 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int sumsq = 0; int sum = 0; uint8_t d[16]; - const short *rv2 = rv3 + ((c * 17) & 127); + const int16_t *rv2 = rv3 + ((c * 17) & 127); for (i = -8; i <= 6; i++) { sumsq += s[i * pitch] * s[i * pitch]; @@ -229,6 +342,40 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, } } +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, + int rows, int cols, int flimit) { + int r, c, i; + const int16_t *rv3 = &vp9_rv[63 & rand()]; // NOLINT + + for (c = 0; c < cols; c++) { + uint16_t *s = &dst[c]; + int sumsq = 0; + int sum = 0; + uint16_t d[16]; + const int16_t *rv2 = rv3 + ((c * 17) & 127); + + for (i = -8; i <= 6; i++) { + sumsq += s[i * pitch] * s[i * pitch]; + sum += s[i * pitch]; + } + + for (r = 0; r < rows + 8; r++) { + sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; + sum += s[7 * pitch] - s[-8 * pitch]; + d[r & 15] = s[0]; + + if (sumsq * 15 - sum * sum < flimit) { + d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; + } + + s[-8 * pitch] = d[(r - 8) & 15]; + s += pitch; + } + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, @@ -239,6 +386,51 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, (void) low_var_thresh; (void) flag; +#if CONFIG_VP9_HIGHBITDEPTH + if (source->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer), + CONVERT_TO_SHORTPTR(post->y_buffer), + source->y_stride, post->y_stride, + source->y_height, source->y_width, + ppl); + + vp9_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer), + post->y_stride, post->y_height, + post->y_width, q2mbl(q)); + + vp9_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer), + post->y_stride, post->y_height, + post->y_width, q2mbl(q)); + + vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer), + CONVERT_TO_SHORTPTR(post->u_buffer), + source->uv_stride, post->uv_stride, + source->uv_height, source->uv_width, + ppl); + vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer), + CONVERT_TO_SHORTPTR(post->v_buffer), + source->uv_stride, post->uv_stride, + source->uv_height, source->uv_width, + ppl); + } else { + vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer, + source->y_stride, post->y_stride, + source->y_height, source->y_width, ppl); + + vp9_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, + post->y_width, q2mbl(q)); + + vp9_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, + post->y_width, q2mbl(q)); + + vp9_post_proc_down_and_across(source->u_buffer, post->u_buffer, + source->uv_stride, post->uv_stride, + source->uv_height, source->uv_width, ppl); + vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer, + source->uv_stride, post->uv_stride, + source->uv_height, source->uv_width, ppl); + } +#else vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); @@ -255,6 +447,7 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); +#endif // CONFIG_VP9_HIGHBITDEPTH } void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, @@ -271,10 +464,26 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - for (i = 0; i < MAX_MB_PLANE; ++i) + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VP9_HIGHBITDEPTH + assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == + (dst->flags & YV12_FLAG_HIGHBITDEPTH)); + if (src->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]), + CONVERT_TO_SHORTPTR(dsts[i]), + src_strides[i], dst_strides[i], + src_heights[i], src_widths[i], ppl); + } else { + vp9_post_proc_down_and_across(srcs[i], dsts[i], + src_strides[i], dst_strides[i], + src_heights[i], src_widths[i], ppl); + } +#else vp9_post_proc_down_and_across(srcs[i], dsts[i], src_strides[i], dst_strides[i], src_heights[i], src_widths[i], ppl); +#endif // CONFIG_VP9_HIGHBITDEPTH + } } void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, @@ -293,15 +502,34 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, for (i = 0; i < MAX_MB_PLANE; ++i) { const int src_stride = src_strides[i]; - const uint8_t *const src = srcs[i] + 2 * src_stride + 2; const int src_width = src_widths[i] - 4; const int src_height = src_heights[i] - 4; - const int dst_stride = dst_strides[i]; - uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; - vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, +#if CONFIG_VP9_HIGHBITDEPTH + assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == + (dst->flags & YV12_FLAG_HIGHBITDEPTH)); + if (src->flags & YV12_FLAG_HIGHBITDEPTH) { + const uint16_t *const src_plane = CONVERT_TO_SHORTPTR( + srcs[i] + 2 * src_stride + 2); + uint16_t *const dst_plane = CONVERT_TO_SHORTPTR( + dsts[i] + 2 * dst_stride + 2); + vp9_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride, + dst_stride, src_height, src_width, + ppl); + } else { + const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; + uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; + + vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride, + dst_stride, src_height, src_width, ppl); + } +#else + const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; + uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; + vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride, src_height, src_width, ppl); +#endif } } @@ -316,7 +544,7 @@ static void fillrd(struct postproc_state *state, int q, int a) { double sigma; int ai = a, qi = q, i; - vp9_clear_system_state(); + vpx_clear_system_state(); sigma = ai + .5 + .6 * (63 - qi) / 63.0; @@ -324,16 +552,15 @@ static void fillrd(struct postproc_state *state, int q, int a) { * a gaussian distribution with sigma determined by q. */ { - double i; int next, j; next = 0; for (i = -32; i < 32; i++) { - int a = (int)(0.5 + 256 * gaussian(sigma, 0, i)); + int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); - if (a) { - for (j = 0; j < a; j++) { + if (a_i) { + for (j = 0; j < a_i; j++) { char_dist[next + j] = (char) i; } @@ -366,6 +593,9 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise, unsigned int width, unsigned int height, int pitch) { unsigned int i, j; + // TODO(jbb): why does simd code use both but c doesn't, normalize and + // fix.. + (void) bothclamp; for (i = 0; i < height; i++) { uint8_t *pos = start + i * pitch; char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT @@ -382,9 +612,20 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise, } } +static void swap_mi_and_prev_mi(VP9_COMMON *cm) { + // Current mip will be the prev_mip for the next frame. + MODE_INFO *temp = cm->postproc_state.prev_mip; + cm->postproc_state.prev_mip = cm->mip; + cm->mip = temp; + + // Update the upper left visible macroblock ptrs. + cm->mi = cm->mip + cm->mi_stride + 1; + cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1; +} + int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) { - const int q = MIN(63, cm->lf.filter_level * 10 / 6); + const int q = MIN(105, cm->lf.filter_level * 2); const int flags = ppflags->post_proc_flag; YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; struct postproc_state *const ppstate = &cm->postproc_state; @@ -397,17 +638,76 @@ int vp9_post_proc_frame(struct VP9Common *cm, return 0; } - vp9_clear_system_state(); + vpx_clear_system_state(); -#if CONFIG_VP9_POSTPROC || CONFIG_INTERNAL_STATS - if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, + // Alloc memory for prev_mip in the first frame. + if (cm->current_video_frame == 1) { + cm->postproc_state.last_base_qindex = cm->base_qindex; + cm->postproc_state.last_frame_valid = 1; + ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); + if (!ppstate->prev_mip) { + return 1; + } + ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; + memset(ppstate->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + } + + // Allocate post_proc_buffer_int if needed. + if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) { + if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { + const int width = ALIGN_POWER_OF_TWO(cm->width, 4); + const int height = ALIGN_POWER_OF_TWO(cm->height, 4); + + if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif // CONFIG_VP9_HIGHBITDEPTH + VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment) < 0) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate MFQE framebuffer"); + } + + // Ensure that postproc is set to all 0s so that post proc + // doesn't pull random data in from edge. + memset(cm->post_proc_buffer_int.buffer_alloc, 128, + cm->post_proc_buffer.frame_size); + } + } + + if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, - VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0) +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment, + NULL, NULL, NULL) < 0) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate post-processing buffer"); -#endif - if (flags & VP9D_DEMACROBLOCK) { + if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && + cm->postproc_state.last_frame_valid && cm->bit_depth == 8 && + cm->postproc_state.last_base_qindex <= last_q_thresh && + cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) { + vp9_mfqe(cm); + // TODO(jackychen): Consider whether enable deblocking by default + // if mfqe is enabled. Need to take both the quality and the speed + // into consideration. + if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { + vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); + } + if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { + deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, + q + (ppflags->deblocking_level - 5) * 10, + 1, 0); + } else if (flags & VP9D_DEBLOCK) { + vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q); + } else { + vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); + } + } else if (flags & VP9D_DEMACROBLOCK) { deblock_and_de_macro_block(cm->frame_to_show, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0); } else if (flags & VP9D_DEBLOCK) { @@ -416,6 +716,9 @@ int vp9_post_proc_frame(struct VP9Common *cm, vp8_yv12_copy_frame(cm->frame_to_show, ppbuf); } + cm->postproc_state.last_base_qindex = cm->base_qindex; + cm->postproc_state.last_frame_valid = 1; + if (flags & VP9D_ADDNOISE) { const int noise_level = ppflags->noise_level; if (ppstate->last_q != q || @@ -436,6 +739,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, dest->uv_width = dest->y_width >> cm->subsampling_x; dest->uv_height = dest->y_height >> cm->subsampling_y; + swap_mi_and_prev_mi(cm); return 0; } -#endif +#endif // CONFIG_VP9_POSTPROC |