summaryrefslogtreecommitdiff
path: root/source/libvpx/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'source/libvpx/vp8')
-rw-r--r--source/libvpx/vp8/common/arm/neon/loopfilter_neon.c81
-rw-r--r--source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c123
-rw-r--r--source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c8
-rw-r--r--source/libvpx/vp8/encoder/denoising.c20
-rw-r--r--source/libvpx/vp8/encoder/denoising.h2
-rw-r--r--source/libvpx/vp8/encoder/mcomp.c4
-rw-r--r--source/libvpx/vp8/encoder/pickinter.c16
-rw-r--r--source/libvpx/vp8/vp8_dx_iface.c38
8 files changed, 157 insertions, 135 deletions
diff --git a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
index e103476..9d6807a 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
+++ b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
@@ -10,6 +10,7 @@
#include <arm_neon.h>
#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
static INLINE void vp8_loop_filter_neon(
uint8x16_t qblimit, // flimit
@@ -251,38 +252,56 @@ void vp8_loop_filter_horizontal_edge_uv_neon(
return;
}
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
-#warning Using GCC 4.6 is not recommended
-// Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built
-// with any gcc4.6, use the C code.
-extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh,
- int count);
-
-void vp8_loop_filter_vertical_edge_y_neon(
- unsigned char *src,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh) {
- vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2);
-}
-
-void vp8_loop_filter_vertical_edge_uv_neon(
- unsigned char *u,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh,
- unsigned char *v) {
- vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1);
- vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1);
-}
-#else
static INLINE void write_4x8(unsigned char *dst, int pitch,
const uint8x8x4_t result) {
+#ifdef VPX_INCOMPATIBLE_GCC
+ /*
+ * uint8x8x4_t result
+ 00 01 02 03 | 04 05 06 07
+ 10 11 12 13 | 14 15 16 17
+ 20 21 22 23 | 24 25 26 27
+ 30 31 32 33 | 34 35 36 37
+ ---
+ * after vtrn_u16
+ 00 01 20 21 | 04 05 24 25
+ 02 03 22 23 | 06 07 26 27
+ 10 11 30 31 | 14 15 34 35
+ 12 13 32 33 | 16 17 36 37
+ ---
+ * after vtrn_u8
+ 00 10 20 30 | 04 14 24 34
+ 01 11 21 31 | 05 15 25 35
+ 02 12 22 32 | 06 16 26 36
+ 03 13 23 33 | 07 17 27 37
+ */
+ const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
+ vreinterpret_u16_u8(result.val[2]));
+ const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
+ vreinterpret_u16_u8(result.val[3]));
+ const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
+ vreinterpret_u8_u16(r13_u16.val[0]));
+ const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
+ vreinterpret_u8_u16(r13_u16.val[1]));
+ const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
+ const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
+ const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
+ const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
+#else
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
@@ -298,6 +317,7 @@ static INLINE void write_4x8(unsigned char *dst, int pitch,
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
+#endif // VPX_INCOMPATIBLE_GCC
}
void vp8_loop_filter_vertical_edge_y_neon(
@@ -528,4 +548,3 @@ void vp8_loop_filter_vertical_edge_uv_neon(
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
}
-#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
diff --git a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
index d5178bb..e1c8609 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
+++ b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
@@ -10,45 +10,9 @@
#include <arm_neon.h>
#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
-static INLINE void write_2x8(unsigned char *dst, int pitch,
- const uint8x8x2_t result,
- const uint8x8x2_t result2) {
- vst2_lane_u8(dst, result, 0);
- dst += pitch;
- vst2_lane_u8(dst, result, 1);
- dst += pitch;
- vst2_lane_u8(dst, result, 2);
- dst += pitch;
- vst2_lane_u8(dst, result, 3);
- dst += pitch;
- vst2_lane_u8(dst, result, 4);
- dst += pitch;
- vst2_lane_u8(dst, result, 5);
- dst += pitch;
- vst2_lane_u8(dst, result, 6);
- dst += pitch;
- vst2_lane_u8(dst, result, 7);
- dst += pitch;
-
- vst2_lane_u8(dst, result2, 0);
- dst += pitch;
- vst2_lane_u8(dst, result2, 1);
- dst += pitch;
- vst2_lane_u8(dst, result2, 2);
- dst += pitch;
- vst2_lane_u8(dst, result2, 3);
- dst += pitch;
- vst2_lane_u8(dst, result2, 4);
- dst += pitch;
- vst2_lane_u8(dst, result2, 5);
- dst += pitch;
- vst2_lane_u8(dst, result2, 6);
- dst += pitch;
- vst2_lane_u8(dst, result2, 7);
-}
-#else
+#ifdef VPX_INCOMPATIBLE_GCC
static INLINE void write_2x4(unsigned char *dst, int pitch,
const uint8x8x2_t result) {
/*
@@ -88,30 +52,47 @@ static INLINE void write_2x8(unsigned char *dst, int pitch,
dst += pitch * 8;
write_2x4(dst, pitch, result2);
}
-#endif
-
+#else
+static INLINE void write_2x8(unsigned char *dst, int pitch,
+ const uint8x8x2_t result,
+ const uint8x8x2_t result2) {
+ vst2_lane_u8(dst, result, 0);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 1);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 2);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 3);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 4);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 5);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 6);
+ dst += pitch;
+ vst2_lane_u8(dst, result, 7);
+ dst += pitch;
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
-static INLINE
-uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
- x = vld4_lane_u8(src, x, 0);
- src += pitch;
- x = vld4_lane_u8(src, x, 1);
- src += pitch;
- x = vld4_lane_u8(src, x, 2);
- src += pitch;
- x = vld4_lane_u8(src, x, 3);
- src += pitch;
- x = vld4_lane_u8(src, x, 4);
- src += pitch;
- x = vld4_lane_u8(src, x, 5);
- src += pitch;
- x = vld4_lane_u8(src, x, 6);
- src += pitch;
- x = vld4_lane_u8(src, x, 7);
- return x;
+ vst2_lane_u8(dst, result2, 0);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 1);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 2);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 3);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 4);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 5);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 6);
+ dst += pitch;
+ vst2_lane_u8(dst, result2, 7);
}
-#else
+#endif // VPX_INCOMPATIBLE_GCC
+
+
+#ifdef VPX_INCOMPATIBLE_GCC
static INLINE
uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
const uint8x8_t a = vld1_u8(src);
@@ -169,7 +150,27 @@ uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
return x;
}
-#endif
+#else
+static INLINE
+uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
+ x = vld4_lane_u8(src, x, 0);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 1);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 2);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 3);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 4);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 5);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 6);
+ src += pitch;
+ x = vld4_lane_u8(src, x, 7);
+ return x;
+}
+#endif // VPX_INCOMPATIBLE_GCC
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
unsigned char *s,
diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
index ffa3d91..5ad9465 100644
--- a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
+++ b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
@@ -9,11 +9,9 @@
*/
#include <arm_neon.h>
+#include "vpx_ports/arm.h"
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
-#warning Using GCC 4.6 is not recommended
-// Some versions of gcc4.6 do not correctly process this function. When built
-// with any gcc4.6, use the C code.
+#ifdef VPX_INCOMPATIBLE_GCC
#include "./vp8_rtcd.h"
void vp8_short_walsh4x4_neon(
int16_t *input,
@@ -128,4 +126,4 @@ void vp8_short_walsh4x4_neon(
vst1q_s16(output + 8, q1s16);
return;
}
-#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
+#endif // VPX_INCOMPATIBLE_GCC
diff --git a/source/libvpx/vp8/encoder/denoising.c b/source/libvpx/vp8/encoder/denoising.c
index 12f9734..75b2a3b 100644
--- a/source/libvpx/vp8/encoder/denoising.c
+++ b/source/libvpx/vp8/encoder/denoising.c
@@ -390,9 +390,9 @@ void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
denoiser->denoise_pars.scale_motion_thresh = 16;
denoiser->denoise_pars.scale_increase_filter = 1;
denoiser->denoise_pars.denoise_mv_bias = 60;
- denoiser->denoise_pars.pickmode_mv_bias = 60;
- denoiser->denoise_pars.qp_thresh = 100;
- denoiser->denoise_pars.consec_zerolast = 10;
+ denoiser->denoise_pars.pickmode_mv_bias = 75;
+ denoiser->denoise_pars.qp_thresh = 85;
+ denoiser->denoise_pars.consec_zerolast = 15;
denoiser->denoise_pars.spatial_blur = 20;
}
}
@@ -453,17 +453,17 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
// Bitrate thresholds and noise metric (nmse) thresholds for switching to
// aggressive mode.
// TODO(marpan): Adjust thresholds, including effect on resolution.
- denoiser->bitrate_threshold = 200000; // (bits/sec).
+ denoiser->bitrate_threshold = 300000; // (bits/sec).
denoiser->threshold_aggressive_mode = 35;
- if (width * height > 640 * 480) {
- denoiser->bitrate_threshold = 500000;
- denoiser->threshold_aggressive_mode = 100;
+ if (width * height > 1280 * 720) {
+ denoiser->bitrate_threshold = 2000000;
+ denoiser->threshold_aggressive_mode = 1400;
} else if (width * height > 960 * 540) {
denoiser->bitrate_threshold = 800000;
denoiser->threshold_aggressive_mode = 150;
- } else if (width * height > 1280 * 720) {
- denoiser->bitrate_threshold = 2000000;
- denoiser->threshold_aggressive_mode = 1400;
+ } else if (width * height > 640 * 480) {
+ denoiser->bitrate_threshold = 500000;
+ denoiser->threshold_aggressive_mode = 100;
}
return 0;
}
diff --git a/source/libvpx/vp8/encoder/denoising.h b/source/libvpx/vp8/encoder/denoising.h
index fb7930b..6c1f9e2 100644
--- a/source/libvpx/vp8/encoder/denoising.h
+++ b/source/libvpx/vp8/encoder/denoising.h
@@ -27,6 +27,8 @@ extern "C" {
#define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 8)
#define MOTION_MAGNITUDE_THRESHOLD_UV (8*3)
+#define MAX_GF_ARF_DENOISE_RANGE (16)
+
enum vp8_denoiser_decision
{
COPY_BLOCK,
diff --git a/source/libvpx/vp8/encoder/mcomp.c b/source/libvpx/vp8/encoder/mcomp.c
index 54abe76..545f2c8 100644
--- a/source/libvpx/vp8/encoder/mcomp.c
+++ b/source/libvpx/vp8/encoder/mcomp.c
@@ -393,8 +393,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#endif
/* central mv */
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->as_mv.row *= 8;
+ bestmv->as_mv.col *= 8;
startmv = *bestmv;
/* calculate central point error */
diff --git a/source/libvpx/vp8/encoder/pickinter.c b/source/libvpx/vp8/encoder/pickinter.c
index 43f8957..9d5556d 100644
--- a/source/libvpx/vp8/encoder/pickinter.c
+++ b/source/libvpx/vp8/encoder/pickinter.c
@@ -516,9 +516,8 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2,
// Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame.
if (this_mode == ZEROMV &&
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME &&
- (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME))
- {
- this_rd = ((int64_t)this_rd) * rd_adj / 100;
+ (denoise_aggressive || cpi->closest_reference_frame == LAST_FRAME)) {
+ this_rd = ((int64_t)this_rd) * rd_adj / 100;
}
check_for_encode_breakout(*sse, x);
@@ -1083,7 +1082,14 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
/* Store for later use by denoiser. */
- if (this_mode == ZEROMV && sse < zero_mv_sse )
+ // Dont' denoise with GOLDEN OR ALTREF is they are old reference
+ // frames (greater than MAX_GF_ARF_DENOISE_RANGE frames in past).
+ int skip_old_reference = ((this_ref_frame != LAST_FRAME) &&
+ (cpi->common.current_video_frame -
+ cpi->current_ref_frames[this_ref_frame] >
+ MAX_GF_ARF_DENOISE_RANGE)) ? 1 : 0;
+ if (this_mode == ZEROMV && sse < zero_mv_sse &&
+ !skip_old_reference)
{
zero_mv_sse = sse;
x->best_zeromv_reference_frame =
@@ -1092,7 +1098,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
/* Store the best NEWMV in x for later use in the denoiser. */
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
- sse < best_sse)
+ sse < best_sse && !skip_old_reference)
{
best_sse = sse;
x->best_sse_inter_mode = NEWMV;
diff --git a/source/libvpx/vp8/vp8_dx_iface.c b/source/libvpx/vp8/vp8_dx_iface.c
index 3ab8ed0..5aa274d 100644
--- a/source/libvpx/vp8/vp8_dx_iface.c
+++ b/source/libvpx/vp8/vp8_dx_iface.c
@@ -112,22 +112,19 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
* structure. More memory may be required at the time the stream
* information becomes known.
*/
- if (!ctx->priv)
- {
- vp8_init_ctx(ctx);
- priv = (vpx_codec_alg_priv_t *)ctx->priv;
-
- /* initialize number of fragments to zero */
- priv->fragments.count = 0;
- /* is input fragments enabled? */
- priv->fragments.enabled =
- (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS);
-
- /*post processing level initialized to do nothing */
- }
- else
- {
- priv = (vpx_codec_alg_priv_t *)ctx->priv;
+ if (!ctx->priv) {
+ vp8_init_ctx(ctx);
+ priv = (vpx_codec_alg_priv_t *)ctx->priv;
+
+ /* initialize number of fragments to zero */
+ priv->fragments.count = 0;
+ /* is input fragments enabled? */
+ priv->fragments.enabled =
+ (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS);
+
+ /*post processing level initialized to do nothing */
+ } else {
+ priv = (vpx_codec_alg_priv_t *)ctx->priv;
}
priv->yv12_frame_buffers.use_frame_threads =
@@ -138,11 +135,10 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
if (priv->yv12_frame_buffers.use_frame_threads &&
((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) ||
- (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS)))
- {
- /* row-based threading, error concealment, and input fragments will
- * not be supported when using frame-based threading */
- res = VPX_CODEC_INVALID_PARAM;
+ (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) {
+ /* row-based threading, error concealment, and input fragments will
+ * not be supported when using frame-based threading */
+ res = VPX_CODEC_INVALID_PARAM;
}
return res;