diff options
Diffstat (limited to 'source/libvpx/vp8')
-rw-r--r-- | source/libvpx/vp8/common/arm/neon/loopfilter_neon.c | 81 | ||||
-rw-r--r-- | source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c | 123 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c | 8 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/denoising.c | 20 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/denoising.h | 2 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/mcomp.c | 4 | ||||
-rw-r--r-- | source/libvpx/vp8/encoder/pickinter.c | 16 | ||||
-rw-r--r-- | source/libvpx/vp8/vp8_dx_iface.c | 38 |
8 files changed, 157 insertions, 135 deletions
diff --git a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c index e103476..9d6807a 100644 --- a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c +++ b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c @@ -10,6 +10,7 @@ #include <arm_neon.h> #include "./vpx_config.h" +#include "vpx_ports/arm.h" static INLINE void vp8_loop_filter_neon( uint8x16_t qblimit, // flimit @@ -251,38 +252,56 @@ void vp8_loop_filter_horizontal_edge_uv_neon( return; } -#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) -#warning Using GCC 4.6 is not recommended -// Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built -// with any gcc4.6, use the C code. -extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count); - -void vp8_loop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2); -} - -void vp8_loop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1); - vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1); -} -#else static INLINE void write_4x8(unsigned char *dst, int pitch, const uint8x8x4_t result) { +#ifdef VPX_INCOMPATIBLE_GCC + /* + * uint8x8x4_t result + 00 01 02 03 | 04 05 06 07 + 10 11 12 13 | 14 15 16 17 + 20 21 22 23 | 24 25 26 27 + 30 31 32 33 | 34 35 36 37 + --- + * after vtrn_u16 + 00 01 20 21 | 04 05 24 25 + 02 03 22 23 | 06 07 26 27 + 10 11 30 31 | 14 15 34 35 + 12 13 32 33 | 16 17 36 37 + --- + * after vtrn_u8 + 00 10 20 30 | 04 14 24 34 + 01 11 21 31 | 05 15 25 35 + 02 12 22 32 | 06 16 26 36 + 03 13 23 33 | 07 17 27 37 + */ + const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), + vreinterpret_u16_u8(result.val[2])); + const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), + vreinterpret_u16_u8(result.val[3])); + const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), + vreinterpret_u8_u16(r13_u16.val[0])); + const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), + vreinterpret_u8_u16(r13_u16.val[1])); + const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); + const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); + const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); + const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); + vst1_lane_u32((uint32_t *)dst, x_0_4, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_1_5, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_2_6, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_3_7, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_0_4, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_1_5, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_2_6, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_3_7, 1); +#else vst4_lane_u8(dst, result, 0); dst += pitch; vst4_lane_u8(dst, result, 1); @@ -298,6 +317,7 @@ static INLINE void write_4x8(unsigned char *dst, int pitch, vst4_lane_u8(dst, result, 6); dst += pitch; vst4_lane_u8(dst, result, 7); +#endif // VPX_INCOMPATIBLE_GCC } void vp8_loop_filter_vertical_edge_y_neon( @@ -528,4 +548,3 @@ void vp8_loop_filter_vertical_edge_uv_neon( vd = v - 2; write_4x8(vd, pitch, q4ResultH); } -#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) diff --git a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c index d5178bb..e1c8609 100644 --- a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c @@ -10,45 +10,9 @@ #include <arm_neon.h> #include "./vpx_config.h" +#include "vpx_ports/arm.h" -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - vst2_lane_u8(dst, result, 0); - dst += pitch; - vst2_lane_u8(dst, result, 1); - dst += pitch; - vst2_lane_u8(dst, result, 2); - dst += pitch; - vst2_lane_u8(dst, result, 3); - dst += pitch; - vst2_lane_u8(dst, result, 4); - dst += pitch; - vst2_lane_u8(dst, result, 5); - dst += pitch; - vst2_lane_u8(dst, result, 6); - dst += pitch; - vst2_lane_u8(dst, result, 7); - dst += pitch; - - vst2_lane_u8(dst, result2, 0); - dst += pitch; - vst2_lane_u8(dst, result2, 1); - dst += pitch; - vst2_lane_u8(dst, result2, 2); - dst += pitch; - vst2_lane_u8(dst, result2, 3); - dst += pitch; - vst2_lane_u8(dst, result2, 4); - dst += pitch; - vst2_lane_u8(dst, result2, 5); - dst += pitch; - vst2_lane_u8(dst, result2, 6); - dst += pitch; - vst2_lane_u8(dst, result2, 7); -} -#else +#ifdef VPX_INCOMPATIBLE_GCC static INLINE void write_2x4(unsigned char *dst, int pitch, const uint8x8x2_t result) { /* @@ -88,30 +52,47 @@ static INLINE void write_2x8(unsigned char *dst, int pitch, dst += pitch * 8; write_2x4(dst, pitch, result2); } -#endif - +#else +static INLINE void write_2x8(unsigned char *dst, int pitch, + const uint8x8x2_t result, + const uint8x8x2_t result2) { + vst2_lane_u8(dst, result, 0); + dst += pitch; + vst2_lane_u8(dst, result, 1); + dst += pitch; + vst2_lane_u8(dst, result, 2); + dst += pitch; + vst2_lane_u8(dst, result, 3); + dst += pitch; + vst2_lane_u8(dst, result, 4); + dst += pitch; + vst2_lane_u8(dst, result, 5); + dst += pitch; + vst2_lane_u8(dst, result, 6); + dst += pitch; + vst2_lane_u8(dst, result, 7); + dst += pitch; -#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { - x = vld4_lane_u8(src, x, 0); - src += pitch; - x = vld4_lane_u8(src, x, 1); - src += pitch; - x = vld4_lane_u8(src, x, 2); - src += pitch; - x = vld4_lane_u8(src, x, 3); - src += pitch; - x = vld4_lane_u8(src, x, 4); - src += pitch; - x = vld4_lane_u8(src, x, 5); - src += pitch; - x = vld4_lane_u8(src, x, 6); - src += pitch; - x = vld4_lane_u8(src, x, 7); - return x; + vst2_lane_u8(dst, result2, 0); + dst += pitch; + vst2_lane_u8(dst, result2, 1); + dst += pitch; + vst2_lane_u8(dst, result2, 2); + dst += pitch; + vst2_lane_u8(dst, result2, 3); + dst += pitch; + vst2_lane_u8(dst, result2, 4); + dst += pitch; + vst2_lane_u8(dst, result2, 5); + dst += pitch; + vst2_lane_u8(dst, result2, 6); + dst += pitch; + vst2_lane_u8(dst, result2, 7); } -#else +#endif // VPX_INCOMPATIBLE_GCC + + +#ifdef VPX_INCOMPATIBLE_GCC static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { const uint8x8_t a = vld1_u8(src); @@ -169,7 +150,27 @@ uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { return x; } -#endif +#else +static INLINE +uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { + x = vld4_lane_u8(src, x, 0); + src += pitch; + x = vld4_lane_u8(src, x, 1); + src += pitch; + x = vld4_lane_u8(src, x, 2); + src += pitch; + x = vld4_lane_u8(src, x, 3); + src += pitch; + x = vld4_lane_u8(src, x, 4); + src += pitch; + x = vld4_lane_u8(src, x, 5); + src += pitch; + x = vld4_lane_u8(src, x, 6); + src += pitch; + x = vld4_lane_u8(src, x, 7); + return x; +} +#endif // VPX_INCOMPATIBLE_GCC static INLINE void vp8_loop_filter_simple_vertical_edge_neon( unsigned char *s, diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c index ffa3d91..5ad9465 100644 --- a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c +++ b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c @@ -9,11 +9,9 @@ */ #include <arm_neon.h> +#include "vpx_ports/arm.h" -#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) -#warning Using GCC 4.6 is not recommended -// Some versions of gcc4.6 do not correctly process this function. When built -// with any gcc4.6, use the C code. +#ifdef VPX_INCOMPATIBLE_GCC #include "./vp8_rtcd.h" void vp8_short_walsh4x4_neon( int16_t *input, @@ -128,4 +126,4 @@ void vp8_short_walsh4x4_neon( vst1q_s16(output + 8, q1s16); return; } -#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) +#endif // VPX_INCOMPATIBLE_GCC diff --git a/source/libvpx/vp8/encoder/denoising.c b/source/libvpx/vp8/encoder/denoising.c index 12f9734..75b2a3b 100644 --- a/source/libvpx/vp8/encoder/denoising.c +++ b/source/libvpx/vp8/encoder/denoising.c @@ -390,9 +390,9 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) { denoiser->denoise_pars.scale_motion_thresh = 16; denoiser->denoise_pars.scale_increase_filter = 1; denoiser->denoise_pars.denoise_mv_bias = 60; - denoiser->denoise_pars.pickmode_mv_bias = 60; - denoiser->denoise_pars.qp_thresh = 100; - denoiser->denoise_pars.consec_zerolast = 10; + denoiser->denoise_pars.pickmode_mv_bias = 75; + denoiser->denoise_pars.qp_thresh = 85; + denoiser->denoise_pars.consec_zerolast = 15; denoiser->denoise_pars.spatial_blur = 20; } } @@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, // Bitrate thresholds and noise metric (nmse) thresholds for switching to // aggressive mode. // TODO(marpan): Adjust thresholds, including effect on resolution. - denoiser->bitrate_threshold = 200000; // (bits/sec). + denoiser->bitrate_threshold = 300000; // (bits/sec). denoiser->threshold_aggressive_mode = 35; - if (width * height > 640 * 480) { - denoiser->bitrate_threshold = 500000; - denoiser->threshold_aggressive_mode = 100; + if (width * height > 1280 * 720) { + denoiser->bitrate_threshold = 2000000; + denoiser->threshold_aggressive_mode = 1400; } else if (width * height > 960 * 540) { denoiser->bitrate_threshold = 800000; denoiser->threshold_aggressive_mode = 150; - } else if (width * height > 1280 * 720) { - denoiser->bitrate_threshold = 2000000; - denoiser->threshold_aggressive_mode = 1400; + } else if (width * height > 640 * 480) { + denoiser->bitrate_threshold = 500000; + denoiser->threshold_aggressive_mode = 100; } return 0; } diff --git a/source/libvpx/vp8/encoder/denoising.h b/source/libvpx/vp8/encoder/denoising.h index fb7930b..6c1f9e2 100644 --- a/source/libvpx/vp8/encoder/denoising.h +++ b/source/libvpx/vp8/encoder/denoising.h @@ -27,6 +27,8 @@ extern "C" { #define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 8) #define MOTION_MAGNITUDE_THRESHOLD_UV (8*3) +#define MAX_GF_ARF_DENOISE_RANGE (16) + enum vp8_denoiser_decision { COPY_BLOCK, diff --git a/source/libvpx/vp8/encoder/mcomp.c b/source/libvpx/vp8/encoder/mcomp.c index 54abe76..545f2c8 100644 --- a/source/libvpx/vp8/encoder/mcomp.c +++ b/source/libvpx/vp8/encoder/mcomp.c @@ -393,8 +393,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #endif /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->as_mv.row *= 8; + bestmv->as_mv.col *= 8; startmv = *bestmv; /* calculate central point error */ diff --git a/source/libvpx/vp8/encoder/pickinter.c b/source/libvpx/vp8/encoder/pickinter.c index 43f8957..9d5556d 100644 --- a/source/libvpx/vp8/encoder/pickinter.c +++ b/source/libvpx/vp8/encoder/pickinter.c @@ -516,9 +516,8 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, // Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame. if (this_mode == ZEROMV && x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME && - (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME)) - { - this_rd = ((int64_t)this_rd) * rd_adj / 100; + (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME)) { + this_rd = ((int64_t)this_rd) * rd_adj / 100; } check_for_encode_breakout(*sse, x); @@ -1083,7 +1082,14 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { /* Store for later use by denoiser. */ - if (this_mode == ZEROMV && sse < zero_mv_sse ) + // Dont' denoise with GOLDEN OR ALTREF is they are old reference + // frames (greater than MAX_GF_ARF_DENOISE_RANGE frames in past). + int skip_old_reference = ((this_ref_frame != LAST_FRAME) && + (cpi->common.current_video_frame - + cpi->current_ref_frames[this_ref_frame] > + MAX_GF_ARF_DENOISE_RANGE)) ? 1 : 0; + if (this_mode == ZEROMV && sse < zero_mv_sse && + !skip_old_reference) { zero_mv_sse = sse; x->best_zeromv_reference_frame = @@ -1092,7 +1098,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* Store the best NEWMV in x for later use in the denoiser. */ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && - sse < best_sse) + sse < best_sse && !skip_old_reference) { best_sse = sse; x->best_sse_inter_mode = NEWMV; diff --git a/source/libvpx/vp8/vp8_dx_iface.c b/source/libvpx/vp8/vp8_dx_iface.c index 3ab8ed0..5aa274d 100644 --- a/source/libvpx/vp8/vp8_dx_iface.c +++ b/source/libvpx/vp8/vp8_dx_iface.c @@ -112,22 +112,19 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, * structure. More memory may be required at the time the stream * information becomes known. */ - if (!ctx->priv) - { - vp8_init_ctx(ctx); - priv = (vpx_codec_alg_priv_t *)ctx->priv; - - /* initialize number of fragments to zero */ - priv->fragments.count = 0; - /* is input fragments enabled? */ - priv->fragments.enabled = - (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); - - /*post processing level initialized to do nothing */ - } - else - { - priv = (vpx_codec_alg_priv_t *)ctx->priv; + if (!ctx->priv) { + vp8_init_ctx(ctx); + priv = (vpx_codec_alg_priv_t *)ctx->priv; + + /* initialize number of fragments to zero */ + priv->fragments.count = 0; + /* is input fragments enabled? */ + priv->fragments.enabled = + (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); + + /*post processing level initialized to do nothing */ + } else { + priv = (vpx_codec_alg_priv_t *)ctx->priv; } priv->yv12_frame_buffers.use_frame_threads = @@ -138,11 +135,10 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, if (priv->yv12_frame_buffers.use_frame_threads && ((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) || - (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) - { - /* row-based threading, error concealment, and input fragments will - * not be supported when using frame-based threading */ - res = VPX_CODEC_INVALID_PARAM; + (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) { + /* row-based threading, error concealment, and input fragments will + * not be supported when using frame-based threading */ + res = VPX_CODEC_INVALID_PARAM; } return res; |