aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--armv7a-neon/libvpx_srcs.txt3
-rw-r--r--armv7a-neon/vp9_rtcd.h21
-rw-r--r--armv7a-neon/vpx_config.h1
-rw-r--r--armv7a/libvpx_srcs.txt2
-rw-r--r--armv7a/vp9_rtcd.h15
-rw-r--r--armv7a/vpx_config.h1
-rw-r--r--generic/libvpx_srcs.txt2
-rw-r--r--generic/vp9_rtcd.h15
-rw-r--r--generic/vpx_config.h1
-rw-r--r--libvpx/README34
-rwxr-xr-xlibvpx/build/make/configure.sh6
-rwxr-xr-xlibvpx/build/make/gen_msvs_sln.sh32
-rwxr-xr-xlibvpx/configure1
-rw-r--r--libvpx/libs.mk7
-rw-r--r--libvpx/test/convolve_test.cc6
-rw-r--r--libvpx/test/test.mk1
-rw-r--r--libvpx/test/vp9_subtract_test.cc5
-rw-r--r--libvpx/test/vp9_thread_test.cc109
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm63
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm53
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm618
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm48
-rw-r--r--libvpx/vp9/common/vp9_blockd.h87
-rw-r--r--libvpx/vp9/common/vp9_common_data.c65
-rw-r--r--libvpx/vp9/common/vp9_common_data.h7
-rw-r--r--libvpx/vp9/common/vp9_entropy.c56
-rw-r--r--libvpx/vp9/common/vp9_entropy.h7
-rw-r--r--libvpx/vp9/common/vp9_entropymode.c89
-rw-r--r--libvpx/vp9/common/vp9_entropymode.h18
-rw-r--r--libvpx/vp9/common/vp9_entropymv.c69
-rw-r--r--libvpx/vp9/common/vp9_enums.h4
-rw-r--r--libvpx/vp9/common/vp9_extend.c16
-rw-r--r--libvpx/vp9/common/vp9_findnearmv.c13
-rw-r--r--libvpx/vp9/common/vp9_findnearmv.h30
-rw-r--r--libvpx/vp9/common/vp9_idct.c57
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.c35
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.h21
-rw-r--r--libvpx/vp9/common/vp9_mv.h8
-rw-r--r--libvpx/vp9/common/vp9_mvref_common.c438
-rw-r--r--libvpx/vp9/common/vp9_mvref_common.h15
-rw-r--r--libvpx/vp9/common/vp9_onyxc_int.h19
-rw-r--r--libvpx/vp9/common/vp9_pred_common.c38
-rw-r--r--libvpx/vp9/common/vp9_pred_common.h8
-rw-r--r--libvpx/vp9/common/vp9_reconinter.c122
-rw-r--r--libvpx/vp9/common/vp9_reconinter.h2
-rw-r--r--libvpx/vp9/common/vp9_rtcd_defs.sh49
-rw-r--r--libvpx/vp9/common/vp9_treecoder.h18
-rw-r--r--libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c279
-rw-r--r--libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm204
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.c446
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.h1
-rw-r--r--libvpx/vp9/decoder/vp9_decodframe.c76
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.c52
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.h1
-rw-r--r--libvpx/vp9/decoder/vp9_idct_blk.c23
-rw-r--r--libvpx/vp9/decoder/vp9_onyxd_if.c18
-rw-r--r--libvpx/vp9/decoder/vp9_onyxd_int.h5
-rw-r--r--libvpx/vp9/decoder/vp9_thread.c248
-rw-r--r--libvpx/vp9/decoder/vp9_thread.h93
-rw-r--r--libvpx/vp9/decoder/vp9_treereader.h1
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.c145
-rw-r--r--libvpx/vp9/encoder/vp9_block.h10
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.c413
-rw-r--r--libvpx/vp9/encoder/vp9_encodeintra.c2
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.c160
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.h4
-rw-r--r--libvpx/vp9/encoder/vp9_encodemv.c2
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.c32
-rw-r--r--libvpx/vp9/encoder/vp9_mbgraph.c19
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.c157
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.h2
-rw-r--r--libvpx/vp9/encoder/vp9_onyx_if.c189
-rw-r--r--libvpx/vp9/encoder/vp9_onyx_int.h35
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.c837
-rw-r--r--libvpx/vp9/encoder/vp9_segmentation.c43
-rw-r--r--libvpx/vp9/encoder/vp9_temporal_filter.c5
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.c88
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.h2
-rw-r--r--libvpx/vp9/vp9_common.mk6
-rw-r--r--libvpx/vp9/vp9cx.mk8
-rw-r--r--libvpx/vp9/vp9dx.mk2
-rw-r--r--libvpx/vpx_scale/generic/yv12config.c4
-rw-r--r--mips-dspr2/libvpx_srcs.txt2
-rw-r--r--mips-dspr2/vp9_rtcd.h15
-rw-r--r--mips-dspr2/vpx_config.h1
-rw-r--r--mips/libvpx_srcs.txt2
-rw-r--r--mips/vp9_rtcd.h15
-rw-r--r--mips/vpx_config.h1
88 files changed, 3734 insertions, 2249 deletions
diff --git a/armv7a-neon/libvpx_srcs.txt b/armv7a-neon/libvpx_srcs.txt
index 7f331c0cf..25ca5e0f8 100644
--- a/armv7a-neon/libvpx_srcs.txt
+++ b/armv7a-neon/libvpx_srcs.txt
@@ -208,6 +208,7 @@ vp9/common/arm/neon/vp9_convolve8_neon.asm.s
vp9/common/arm/neon/vp9_convolve_neon.c
vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm.s
vp9/common/arm/neon/vp9_loopfilter_neon.asm.s
+vp9/common/arm/neon/vp9_mb_lpf_neon.asm.s
vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm.s
vp9/common/generic/vp9_systemdependent.c
vp9/common/vp9_alloccommon.c
@@ -282,6 +283,8 @@ vp9/decoder/vp9_onyxd.h
vp9/decoder/vp9_onyxd_if.c
vp9/decoder/vp9_onyxd_int.h
vp9/decoder/vp9_read_bit_buffer.h
+vp9/decoder/vp9_thread.c
+vp9/decoder/vp9_thread.h
vp9/decoder/vp9_treereader.h
vp9/vp9_common.mk
vp9/vp9_dx_iface.c
diff --git a/armv7a-neon/vp9_rtcd.h b/armv7a-neon/vp9_rtcd.h
index 6e6ff717b..4ebb49773 100644
--- a/armv7a-neon/vp9_rtcd.h
+++ b/armv7a-neon/vp9_rtcd.h
@@ -14,9 +14,7 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -207,7 +205,8 @@ void vp9_add_constant_residual_32x32_neon(const int16_t diff, uint8_t *dest, int
#define vp9_add_constant_residual_32x32 vp9_add_constant_residual_32x32_neon
void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
-#define vp9_mb_lpf_vertical_edge_w vp9_mb_lpf_vertical_edge_w_c
+void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vp9_mb_lpf_vertical_edge_w vp9_mb_lpf_vertical_edge_w_neon
void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
@@ -218,7 +217,8 @@ void vp9_loop_filter_vertical_edge_neon(uint8_t *s, int pitch, const uint8_t *bl
#define vp9_loop_filter_vertical_edge vp9_loop_filter_vertical_edge_neon
void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
-#define vp9_mb_lpf_horizontal_edge_w vp9_mb_lpf_horizontal_edge_w_c
+void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_mb_lpf_horizontal_edge_w vp9_mb_lpf_horizontal_edge_w_neon
void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
@@ -273,6 +273,9 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct4x4_add vp9_short_idct4x4_add_c
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c
+
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct8x8_add vp9_short_idct8x8_add_neon
@@ -280,8 +283,8 @@ void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct16x16_add vp9_short_idct16x16_add_c
@@ -289,18 +292,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c
-
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct32x32_add vp9_short_idct32x32_add_c
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output);
#define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c
-
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_short_iht4x4_add vp9_short_iht4x4_add_c
diff --git a/armv7a-neon/vpx_config.h b/armv7a-neon/vpx_config.h
index 6f45f7ec7..d132e4d60 100644
--- a/armv7a-neon/vpx_config.h
+++ b/armv7a-neon/vpx_config.h
@@ -39,6 +39,7 @@
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0
diff --git a/armv7a/libvpx_srcs.txt b/armv7a/libvpx_srcs.txt
index a929dc3ca..2ddb1bdd0 100644
--- a/armv7a/libvpx_srcs.txt
+++ b/armv7a/libvpx_srcs.txt
@@ -237,6 +237,8 @@ vp9/decoder/vp9_onyxd.h
vp9/decoder/vp9_onyxd_if.c
vp9/decoder/vp9_onyxd_int.h
vp9/decoder/vp9_read_bit_buffer.h
+vp9/decoder/vp9_thread.c
+vp9/decoder/vp9_thread.h
vp9/decoder/vp9_treereader.h
vp9/vp9_common.mk
vp9/vp9_dx_iface.c
diff --git a/armv7a/vp9_rtcd.h b/armv7a/vp9_rtcd.h
index d6b244db4..1ce24c553 100644
--- a/armv7a/vp9_rtcd.h
+++ b/armv7a/vp9_rtcd.h
@@ -14,9 +14,7 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct4x4_add vp9_short_idct4x4_add_c
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c
+
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct8x8_add vp9_short_idct8x8_add_c
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct16x16_add vp9_short_idct16x16_add_c
@@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c
-
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct32x32_add vp9_short_idct32x32_add_c
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output);
#define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c
-
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_short_iht4x4_add vp9_short_iht4x4_add_c
diff --git a/armv7a/vpx_config.h b/armv7a/vpx_config.h
index be08d2a25..a330023f9 100644
--- a/armv7a/vpx_config.h
+++ b/armv7a/vpx_config.h
@@ -39,6 +39,7 @@
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0
diff --git a/generic/libvpx_srcs.txt b/generic/libvpx_srcs.txt
index 402ac2420..055f5fb5d 100644
--- a/generic/libvpx_srcs.txt
+++ b/generic/libvpx_srcs.txt
@@ -197,6 +197,8 @@ vp9/decoder/vp9_onyxd.h
vp9/decoder/vp9_onyxd_if.c
vp9/decoder/vp9_onyxd_int.h
vp9/decoder/vp9_read_bit_buffer.h
+vp9/decoder/vp9_thread.c
+vp9/decoder/vp9_thread.h
vp9/decoder/vp9_treereader.h
vp9/vp9_common.mk
vp9/vp9_dx_iface.c
diff --git a/generic/vp9_rtcd.h b/generic/vp9_rtcd.h
index c0824cb16..2562e82c5 100644
--- a/generic/vp9_rtcd.h
+++ b/generic/vp9_rtcd.h
@@ -14,9 +14,7 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct4x4_add vp9_short_idct4x4_add_c
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c
+
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct8x8_add vp9_short_idct8x8_add_c
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct16x16_add vp9_short_idct16x16_add_c
@@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c
-
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct32x32_add vp9_short_idct32x32_add_c
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output);
#define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c
-
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_short_iht4x4_add vp9_short_iht4x4_add_c
diff --git a/generic/vpx_config.h b/generic/vpx_config.h
index 37dcff976..4d6172b8d 100644
--- a/generic/vpx_config.h
+++ b/generic/vpx_config.h
@@ -39,6 +39,7 @@
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0
diff --git a/libvpx/README b/libvpx/README
index 92cc0742c..d7cb11afb 100644
--- a/libvpx/README
+++ b/libvpx/README
@@ -1,7 +1,7 @@
vpx Multi-Format Codec SDK
-README - 21 June 2012
+README - 1 August 2013
-Welcome to the WebM VP8 Codec SDK!
+Welcome to the WebM VP8/VP9 Codec SDK!
COMPILING THE APPLICATIONS/LIBRARIES:
The build system used is similar to autotools. Building generally consists of
@@ -53,33 +53,63 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv5te-android-gcc
armv5te-linux-rvct
armv5te-linux-gcc
+ armv5te-none-rvct
armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
+ armv6-none-rvct
armv7-android-gcc
+ armv7-darwin-gcc
armv7-linux-rvct
armv7-linux-gcc
+ armv7-none-rvct
+ armv7-win32-vs11
mips32-linux-gcc
ppc32-darwin8-gcc
ppc32-darwin9-gcc
+ ppc32-linux-gcc
ppc64-darwin8-gcc
ppc64-darwin9-gcc
ppc64-linux-gcc
+ sparc-solaris-gcc
+ x86-android-gcc
x86-darwin8-gcc
x86-darwin8-icc
x86-darwin9-gcc
x86-darwin9-icc
+ x86-darwin10-gcc
+ x86-darwin11-gcc
+ x86-darwin12-gcc
+ x86-darwin13-gcc
x86-linux-gcc
x86-linux-icc
+ x86-os2-gcc
x86-solaris-gcc
+ x86-win32-gcc
x86-win32-vs7
x86-win32-vs8
+ x86-win32-vs9
+ x86-win32-vs10
+ x86-win32-vs11
x86_64-darwin9-gcc
+ x86_64-darwin10-gcc
+ x86_64-darwin11-gcc
+ x86_64-darwin12-gcc
+ x86_64-darwin13-gcc
x86_64-linux-gcc
+ x86_64-linux-icc
x86_64-solaris-gcc
+ x86_64-win64-gcc
x86_64-win64-vs8
+ x86_64-win64-vs9
+ x86_64-win64-vs10
+ x86_64-win64-vs11
universal-darwin8-gcc
universal-darwin9-gcc
+ universal-darwin10-gcc
+ universal-darwin11-gcc
+ universal-darwin12-gcc
+ universal-darwin13-gcc
generic-gnu
The generic-gnu target, in conjunction with the CROSS environment variable,
diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh
index 30a61067f..e2566b0a7 100755
--- a/libvpx/build/make/configure.sh
+++ b/libvpx/build/make/configure.sh
@@ -1189,6 +1189,12 @@ EOF
fi
fi
+ # default use_x86inc to yes if pic is no or 64bit or we are not on darwin
+ echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
+ if [ ${tgt_isa} = x86_64 -o ! "$pic" == "yes" -o ! ${tgt_os:0:6} = darwin ]; then
+ soft_enable use_x86inc
+ fi
+
# Position Independent Code (PIC) support, for building relocatable
# shared objects
enabled gcc && enabled pic && check_add_cflags -fPIC
diff --git a/libvpx/build/make/gen_msvs_sln.sh b/libvpx/build/make/gen_msvs_sln.sh
index f9fc69428..0c269b16b 100755
--- a/libvpx/build/make/gen_msvs_sln.sh
+++ b/libvpx/build/make/gen_msvs_sln.sh
@@ -72,15 +72,21 @@ parse_project() {
eval "${var}_name=$name"
eval "${var}_guid=$guid"
- # assume that all projects have the same list of possible configurations,
- # so overwriting old config_lists is not a problem
if [ "$sfx" = "vcproj" ]; then
- config_list=`grep -A1 '<Configuration' $file |
+ cur_config_list=`grep -A1 '<Configuration' $file |
grep Name | cut -d\" -f2`
else
- config_list=`grep -B1 'Label="Configuration"' $file |
+ cur_config_list=`grep -B1 'Label="Configuration"' $file |
grep Condition | cut -d\' -f4`
fi
+ new_config_list=$(for i in $config_list $cur_config_list; do
+ echo $i
+ done | sort | uniq)
+ if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then
+ mixed_platforms=1
+ fi
+ config_list="$new_config_list"
+ eval "${var}_config_list=\"$cur_config_list\""
proj_list="${proj_list} ${var}"
}
@@ -130,6 +136,11 @@ process_global() {
indent_push
IFS_bak=${IFS}
IFS=$'\r'$'\n'
+ if [ "$mixed_platforms" != "" ]; then
+ config_list="
+Release|Mixed Platforms
+Debug|Mixed Platforms"
+ fi
for config in ${config_list}; do
echo "${indent}$config = $config"
done
@@ -144,10 +155,17 @@ process_global() {
indent_push
for proj in ${proj_list}; do
eval "local proj_guid=\${${proj}_guid}"
+ eval "local proj_config_list=\${${proj}_config_list}"
IFS=$'\r'$'\n'
- for config in ${config_list}; do
- echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
- echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
+ for config in ${proj_config_list}; do
+ if [ "$mixed_platforms" != "" ]; then
+ local c=${config%%|*}
+ echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}"
+ echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}"
+ else
+ echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
+ echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
+ fi
done
IFS=${IFS_bak}
diff --git a/libvpx/configure b/libvpx/configure
index 3651334e2..24be893f7 100755
--- a/libvpx/configure
+++ b/libvpx/configure
@@ -257,6 +257,7 @@ CONFIG_LIST="
install_bins
install_libs
install_srcs
+ use_x86inc
debug
gprof
gcov
diff --git a/libvpx/libs.mk b/libvpx/libs.mk
index 4aa7dc48a..233863108 100644
--- a/libvpx/libs.mk
+++ b/libvpx/libs.mk
@@ -57,6 +57,13 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h
RTCD += $$(BUILD_PFX)$(1).h
endef
+# x86inc.asm is not compatible with pic 32bit builds. Restrict
+# files which use it to 64bit builds or 32bit without pic
+USE_X86INC = no
+ifeq ($(CONFIG_USE_X86INC),yes)
+ USE_X86INC = yes
+endif
+
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc
index 3b72129cc..b1510c648 100644
--- a/libvpx/test/convolve_test.cc
+++ b/libvpx/test/convolve_test.cc
@@ -527,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
#if HAVE_SSSE3
const ConvolveFunctions convolve8_ssse3(
- vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
- vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
- vp9_convolve8_ssse3, vp9_convolve8_avg_c);
+ vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
+ vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
+ vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3);
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_ssse3),
diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk
index 619533a38..25e05b9fc 100644
--- a/libvpx/test/test.mk
+++ b/libvpx/test/test.mk
@@ -89,6 +89,7 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
diff --git a/libvpx/test/vp9_subtract_test.cc b/libvpx/test/vp9_subtract_test.cc
index 3e5fe8d6a..24767957f 100644
--- a/libvpx/test/vp9_subtract_test.cc
+++ b/libvpx/test/vp9_subtract_test.cc
@@ -39,7 +39,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
// FIXME(rbultje) split in its own file
- for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
+ for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES;
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
const int block_width = 4 << b_width_log2(bsize);
const int block_height = 4 << b_height_log2(bsize);
@@ -93,9 +93,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_c));
-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_sse2));
#endif
-
} // namespace vp9
diff --git a/libvpx/test/vp9_thread_test.cc b/libvpx/test/vp9_thread_test.cc
new file mode 100644
index 000000000..41d22dd3a
--- /dev/null
+++ b/libvpx/test/vp9_thread_test.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/decoder/vp9_thread.h"
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/webm_video_source.h"
+
+namespace {
+
+class VP9WorkerThreadTest : public ::testing::Test {
+ protected:
+ virtual ~VP9WorkerThreadTest() {}
+ virtual void SetUp() {
+ vp9_worker_init(&worker_);
+ }
+
+ virtual void TearDown() {
+ vp9_worker_end(&worker_);
+ }
+
+ VP9Worker worker_;
+};
+
+int ThreadHook(void* data, void* return_value) {
+ int* const hook_data = reinterpret_cast<int*>(data);
+ *hook_data = 5;
+ return *reinterpret_cast<int*>(return_value);
+}
+
+TEST_F(VP9WorkerThreadTest, HookSuccess) {
+ EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
+
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+
+ int hook_data = 0;
+ int return_value = 1; // return successfully from the hook
+ worker_.hook = ThreadHook;
+ worker_.data1 = &hook_data;
+ worker_.data2 = &return_value;
+
+ vp9_worker_launch(&worker_);
+ EXPECT_TRUE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+ EXPECT_EQ(5, hook_data);
+
+ EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
+ }
+}
+
+TEST_F(VP9WorkerThreadTest, HookFailure) {
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+
+ int hook_data = 0;
+ int return_value = 0; // return failure from the hook
+ worker_.hook = ThreadHook;
+ worker_.data1 = &hook_data;
+ worker_.data2 = &return_value;
+
+ vp9_worker_launch(&worker_);
+ EXPECT_FALSE(vp9_worker_sync(&worker_));
+ EXPECT_TRUE(worker_.had_error);
+
+ // Ensure _reset() clears the error and _launch() can be called again.
+ return_value = 1;
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+ vp9_worker_launch(&worker_);
+ EXPECT_TRUE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+}
+
+TEST(VP9DecodeMTTest, MTDecode) {
+ libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
+ video.Init();
+
+ vpx_codec_dec_cfg_t cfg = {0};
+ cfg.threads = 2;
+ libvpx_test::VP9Decoder decoder(cfg, 0);
+
+ libvpx_test::MD5 md5;
+ for (video.Begin(); video.cxdata(); video.Next()) {
+ const vpx_codec_err_t res =
+ decoder.DecodeFrame(video.cxdata(), video.frame_size());
+ ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+
+ libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img = NULL;
+
+ // Get decompressed data
+ while ((img = dec_iter.Next())) {
+ md5.Add(img);
+ }
+ }
+ EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
+}
+
+} // namespace
diff --git a/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm b/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
index 15039e267..110a56cdd 100644
--- a/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
+++ b/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
@@ -52,15 +52,15 @@
; sp[]int h
|vp9_convolve8_avg_horiz_neon| PROC
+ ldr r12, [sp, #4] ; x_step_q4
+ cmp r12, #16
+ bne vp9_convolve8_avg_horiz_c
+
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
- ldr r4, [sp, #36] ; x_step_q4
ldr r5, [sp, #32] ; filter_x
- cmp r4, #16
- bne call_horiz_c_convolve ; x_step_q4 != 16
-
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
@@ -82,22 +82,22 @@
mov r10, r6 ; w loop counter
loop_horiz
- vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]!
- vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]!
+ vld1.8 {d24}, [r0]!
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
- vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]!
- vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]!
+ vld1.8 {d25}, [r0]!
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
- vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]!
- vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]!
+ vld1.8 {d26}, [r0]!
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
- vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]!
- vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]!
+ vld1.8 {d27}, [r0]!
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
+ vtrn.16 q12, q13
+ vtrn.8 d24, d25
+ vtrn.8 d26, d27
+
; extract to s16
vmovl.u8 q8, d24
vmovl.u8 q9, d25
@@ -128,8 +128,8 @@ loop_horiz
vqrshrun.s32 d5, q15, #7
; saturate
- vqshrn.u16 d2, q1, #0
- vqshrn.u16 d3, q2, #0
+ vqmovn.u16 d2, q1
+ vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
@@ -137,10 +137,7 @@ loop_horiz
vtrn.8 d2, d3
; average the new value and the dst value
- vaddl.u8 q8, d2, d6
- vaddl.u8 q9, d3, d7
- vqrshrn.u16 d2, q8, #1
- vqrshrn.u16 d3, q9, #1
+ vrhadd.u8 q1, q1, q3
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d3[0]}, [r2], r3
@@ -159,26 +156,20 @@ loop_horiz
pop {r4-r10, pc}
-call_horiz_c_convolve
- pop {r4-r10, lr}
- add r0, r0, #3 ; un-adjust for taps
- b vp9_convolve8_avg_horiz_c
-
-
ENDP
|vp9_convolve8_avg_vert_neon| PROC
+ ldr r12, [sp, #12]
+ cmp r12, #16
+ bne vp9_convolve8_avg_vert_c
+
push {r4-r10, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
- ldr r6, [sp, #44] ; y_step_q4
ldr r7, [sp, #40] ; filter_y
- cmp r6, #16
- bne call_vert_c_convolve ; y_step_q4 != 16
-
ldr r8, [sp, #48] ; w
ldr r9, [sp, #52] ; h
@@ -240,14 +231,11 @@ loop_vert
vqrshrun.s32 d5, q15, #7
; saturate
- vqshrn.u16 d2, q1, #0
- vqshrn.u16 d3, q2, #0
+ vqmovn.u16 d2, q1
+ vqmovn.u16 d3, q2
; average the new value and the dst value
- vaddl.u8 q8, d2, d6
- vaddl.u8 q9, d3, d7
- vqrshrn.u16 d2, q8, #1
- vqrshrn.u16 d3, q9, #1
+ vrhadd.u8 q1, q1, q3
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
@@ -266,12 +254,5 @@ loop_vert
pop {r4-r10, pc}
-call_vert_c_convolve
- pop {r4-r10, lr}
- ; un-adjust for taps
- add r0, r0, r1
- add r0, r0, r1, lsl #1
- b vp9_convolve8_avg_vert_c
-
ENDP
END
diff --git a/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm b/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm
index 842c73c90..845e4a866 100644
--- a/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm
+++ b/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm
@@ -52,15 +52,15 @@
; sp[]int h
|vp9_convolve8_horiz_neon| PROC
+ ldr r12, [sp, #4] ; x_step_q4
+ cmp r12, #16
+ bne vp9_convolve8_horiz_c
+
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
- ldr r4, [sp, #36] ; x_step_q4
ldr r5, [sp, #32] ; filter_x
- cmp r4, #16
- bne call_horiz_c_convolve ; x_step_q4 != 16
-
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
@@ -82,22 +82,22 @@
mov r10, r6 ; w loop counter
loop_horiz
- vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]!
- vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]!
+ vld1.8 {d24}, [r0]!
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
- vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]!
- vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]!
+ vld1.8 {d25}, [r0]!
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
- vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]!
- vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]!
+ vld1.8 {d26}, [r0]!
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
- vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]!
- vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]!
+ vld1.8 {d27}, [r0]!
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
+ vtrn.16 q12, q13
+ vtrn.8 d24, d25
+ vtrn.8 d26, d27
+
; extract to s16
vmovl.u8 q8, d24
vmovl.u8 q9, d25
@@ -120,8 +120,8 @@ loop_horiz
vqrshrun.s32 d5, q15, #7
; saturate
- vqshrn.u16 d2, q1, #0
- vqshrn.u16 d3, q2, #0
+ vqmovn.u16 d2, q1
+ vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
@@ -145,26 +145,20 @@ loop_horiz
pop {r4-r10, pc}
-call_horiz_c_convolve
- pop {r4-r10, lr}
- add r0, r0, #3 ; un-adjust for taps
- b vp9_convolve8_horiz_c
-
-
ENDP
|vp9_convolve8_vert_neon| PROC
+ ldr r12, [sp, #12]
+ cmp r12, #16
+ bne vp9_convolve8_vert_c
+
push {r4-r10, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
- ldr r6, [sp, #44] ; y_step_q4
ldr r7, [sp, #40] ; filter_y
- cmp r6, #16
- bne call_vert_c_convolve ; y_step_q4 != 16
-
ldr r8, [sp, #48] ; w
ldr r9, [sp, #52] ; h
@@ -219,8 +213,8 @@ loop_vert
vqrshrun.s32 d5, q15, #7
; saturate
- vqshrn.u16 d2, q1, #0
- vqshrn.u16 d3, q2, #0
+ vqmovn.u16 d2, q1
+ vqmovn.u16 d3, q2
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
@@ -239,12 +233,5 @@ loop_vert
pop {r4-r10, pc}
-call_vert_c_convolve
- pop {r4-r10, lr}
- ; un-adjust for taps
- add r0, r0, r1
- add r0, r0, r1, lsl #1
- b vp9_convolve8_vert_c
-
ENDP
END
diff --git a/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
new file mode 100644
index 000000000..edf5786e3
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -0,0 +1,618 @@
+;
+; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+ EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
+ EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
+ ARM
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh
+; int count)
+; r0 uint8_t *s,
+; r1 int p, /* pitch */
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh,
+|vp9_mb_lpf_horizontal_edge_w_neon| PROC
+ push {r4-r8, lr}
+ vpush {d8-d15}
+ ldr r4, [sp, #88] ; load thresh
+ ldr r12, [sp, #92] ; load count
+
+h_count
+ vld1.8 {d16[]}, [r2] ; load *blimit
+ vld1.8 {d17[]}, [r3] ; load *limit
+ vld1.8 {d18[]}, [r4] ; load *thresh
+
+ sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines
+
+ vld1.u8 {d0}, [r8@64], r1 ; p7
+ vld1.u8 {d1}, [r8@64], r1 ; p6
+ vld1.u8 {d2}, [r8@64], r1 ; p5
+ vld1.u8 {d3}, [r8@64], r1 ; p4
+ vld1.u8 {d4}, [r8@64], r1 ; p3
+ vld1.u8 {d5}, [r8@64], r1 ; p2
+ vld1.u8 {d6}, [r8@64], r1 ; p1
+ vld1.u8 {d7}, [r8@64], r1 ; p0
+ vld1.u8 {d8}, [r8@64], r1 ; q0
+ vld1.u8 {d9}, [r8@64], r1 ; q1
+ vld1.u8 {d10}, [r8@64], r1 ; q2
+ vld1.u8 {d11}, [r8@64], r1 ; q3
+ vld1.u8 {d12}, [r8@64], r1 ; q4
+ vld1.u8 {d13}, [r8@64], r1 ; q5
+ vld1.u8 {d14}, [r8@64], r1 ; q6
+ vld1.u8 {d15}, [r8@64], r1 ; q7
+
+ bl vp9_wide_mbfilter_neon
+
+ tst r7, #1
+ beq h_mbfilter
+
+ ; flat && mask were not set for any of the channels. Just store the values
+ ; from filter.
+ sub r8, r0, r1, lsl #1
+
+ vst1.u8 {d25}, [r8@64], r1 ; store op1
+ vst1.u8 {d24}, [r8@64], r1 ; store op0
+ vst1.u8 {d23}, [r8@64], r1 ; store oq0
+ vst1.u8 {d26}, [r8@64], r1 ; store oq1
+
+ b h_next
+
+h_mbfilter
+ tst r7, #2
+ beq h_wide_mbfilter
+
+ ; flat2 was not set for any of the channels. Just store the values from
+ ; mbfilter.
+ sub r8, r0, r1, lsl #1
+ sub r8, r8, r1
+
+ vst1.u8 {d18}, [r8@64], r1 ; store op2
+ vst1.u8 {d19}, [r8@64], r1 ; store op1
+ vst1.u8 {d20}, [r8@64], r1 ; store op0
+ vst1.u8 {d21}, [r8@64], r1 ; store oq0
+ vst1.u8 {d22}, [r8@64], r1 ; store oq1
+ vst1.u8 {d23}, [r8@64], r1 ; store oq2
+
+ b h_next
+
+h_wide_mbfilter
+ sub r8, r0, r1, lsl #3
+ add r8, r8, r1
+
+ vst1.u8 {d16}, [r8@64], r1 ; store op6
+ vst1.u8 {d24}, [r8@64], r1 ; store op5
+ vst1.u8 {d25}, [r8@64], r1 ; store op4
+ vst1.u8 {d26}, [r8@64], r1 ; store op3
+ vst1.u8 {d27}, [r8@64], r1 ; store op2
+ vst1.u8 {d18}, [r8@64], r1 ; store op1
+ vst1.u8 {d19}, [r8@64], r1 ; store op0
+ vst1.u8 {d20}, [r8@64], r1 ; store oq0
+ vst1.u8 {d21}, [r8@64], r1 ; store oq1
+ vst1.u8 {d22}, [r8@64], r1 ; store oq2
+ vst1.u8 {d23}, [r8@64], r1 ; store oq3
+ vst1.u8 {d1}, [r8@64], r1 ; store oq4
+ vst1.u8 {d2}, [r8@64], r1 ; store oq5
+ vst1.u8 {d3}, [r8@64], r1 ; store oq6
+
+h_next
+ add r0, r0, #8
+ subs r12, r12, #1
+ bne h_count
+
+ vpop {d8-d15}
+ pop {r4-r8, pc}
+
+ ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
+
+; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
+; r0 uint8_t *s,
+; r1 int p, /* pitch */
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh,
+|vp9_mb_lpf_vertical_edge_w_neon| PROC
+ push {r4-r8, lr}
+ vpush {d8-d15}
+ ldr r4, [sp, #88] ; load thresh
+
+ vld1.8 {d16[]}, [r2] ; load *blimit
+ vld1.8 {d17[]}, [r3] ; load *limit
+ vld1.8 {d18[]}, [r4] ; load *thresh
+
+ sub r8, r0, #8
+
+ vld1.8 {d0}, [r8@64], r1
+ vld1.8 {d8}, [r0@64], r1
+ vld1.8 {d1}, [r8@64], r1
+ vld1.8 {d9}, [r0@64], r1
+ vld1.8 {d2}, [r8@64], r1
+ vld1.8 {d10}, [r0@64], r1
+ vld1.8 {d3}, [r8@64], r1
+ vld1.8 {d11}, [r0@64], r1
+ vld1.8 {d4}, [r8@64], r1
+ vld1.8 {d12}, [r0@64], r1
+ vld1.8 {d5}, [r8@64], r1
+ vld1.8 {d13}, [r0@64], r1
+ vld1.8 {d6}, [r8@64], r1
+ vld1.8 {d14}, [r0@64], r1
+ vld1.8 {d7}, [r8@64], r1
+ vld1.8 {d15}, [r0@64], r1
+
+ sub r0, r0, r1, lsl #3
+
+ vtrn.32 q0, q2
+ vtrn.32 q1, q3
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+
+ vtrn.16 q0, q1
+ vtrn.16 q2, q3
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ vtrn.8 d0, d1
+ vtrn.8 d2, d3
+ vtrn.8 d4, d5
+ vtrn.8 d6, d7
+
+ vtrn.8 d8, d9
+ vtrn.8 d10, d11
+ vtrn.8 d12, d13
+ vtrn.8 d14, d15
+
+ bl vp9_wide_mbfilter_neon
+
+ tst r7, #1
+ beq v_mbfilter
+
+ ; flat && mask were not set for any of the channels. Just store the values
+ ; from filter.
+ sub r8, r0, #2
+
+ vswp d23, d25
+
+ vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
+ vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
+ vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
+ vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
+ vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
+ vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
+ vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
+ vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
+
+ b v_end
+
+v_mbfilter
+ tst r7, #2
+ beq v_wide_mbfilter
+
+ ; flat2 was not set for any of the channels. Just store the values from
+ ; mbfilter.
+ sub r8, r0, #3
+
+ vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
+ vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
+ vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
+ vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
+ vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
+ vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
+ vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
+ vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
+ vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
+ vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
+ vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
+ vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
+ vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
+ vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
+ vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
+ vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
+
+ b v_end
+
+v_wide_mbfilter
+ sub r8, r0, #8
+
+ vtrn.32 d0, d26
+ vtrn.32 d16, d27
+ vtrn.32 d24, d18
+ vtrn.32 d25, d19
+
+ vtrn.16 d0, d24
+ vtrn.16 d16, d25
+ vtrn.16 d26, d18
+ vtrn.16 d27, d19
+
+ vtrn.8 d0, d16
+ vtrn.8 d24, d25
+ vtrn.8 d26, d27
+ vtrn.8 d18, d19
+
+ vtrn.32 d20, d1
+ vtrn.32 d21, d2
+ vtrn.32 d22, d3
+ vtrn.32 d23, d15
+
+ vtrn.16 d20, d22
+ vtrn.16 d21, d23
+ vtrn.16 d1, d3
+ vtrn.16 d2, d15
+
+ vtrn.8 d20, d21
+ vtrn.8 d22, d23
+ vtrn.8 d1, d2
+ vtrn.8 d3, d15
+
+ vst1.8 {d0}, [r8@64], r1
+ vst1.8 {d20}, [r0@64], r1
+ vst1.8 {d16}, [r8@64], r1
+ vst1.8 {d21}, [r0@64], r1
+ vst1.8 {d24}, [r8@64], r1
+ vst1.8 {d22}, [r0@64], r1
+ vst1.8 {d25}, [r8@64], r1
+ vst1.8 {d23}, [r0@64], r1
+ vst1.8 {d26}, [r8@64], r1
+ vst1.8 {d1}, [r0@64], r1
+ vst1.8 {d27}, [r8@64], r1
+ vst1.8 {d2}, [r0@64], r1
+ vst1.8 {d18}, [r8@64], r1
+ vst1.8 {d3}, [r0@64], r1
+ vst1.8 {d19}, [r8@64], r1
+ vst1.8 {d15}, [r0@64], r1
+
+v_end
+ vpop {d8-d15}
+ pop {r4-r8, pc}
+
+ ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
+
+; void vp9_wide_mbfilter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+;
+; r0-r3 PRESERVE
+; d16 blimit
+; d17 limit
+; d18 thresh
+; d0 p7
+; d1 p6
+; d2 p5
+; d3 p4
+; d4 p3
+; d5 p2
+; d6 p1
+; d7 p0
+; d8 q0
+; d9 q1
+; d10 q2
+; d11 q3
+; d12 q4
+; d13 q5
+; d14 q6
+; d15 q7
+|vp9_wide_mbfilter_neon| PROC
+ mov r7, #0
+
+ ; filter_mask
+ vabd.u8 d19, d4, d5 ; abs(p3 - p2)
+ vabd.u8 d20, d5, d6 ; abs(p2 - p1)
+ vabd.u8 d21, d6, d7 ; abs(p1 - p0)
+ vabd.u8 d22, d9, d8 ; abs(q1 - q0)
+ vabd.u8 d23, d10, d9 ; abs(q2 - q1)
+ vabd.u8 d24, d11, d10 ; abs(q3 - q2)
+
+ ; only compare the largest value to limit
+ vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1))
+ vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0))
+ vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2))
+ vmax.u8 d19, d19, d20
+
+ vabd.u8 d24, d7, d8 ; abs(p0 - q0)
+
+ vmax.u8 d19, d19, d23
+
+ vabd.u8 d23, d6, d9 ; a = abs(p1 - q1)
+ vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
+
+ ; abs () > limit
+ vcge.u8 d19, d17, d19
+
+ ; flatmask4
+ vabd.u8 d25, d7, d5 ; abs(p0 - p2)
+ vabd.u8 d26, d8, d10 ; abs(q0 - q2)
+ vabd.u8 d27, d4, d7 ; abs(p3 - p0)
+ vabd.u8 d28, d11, d8 ; abs(q3 - q0)
+
+ ; only compare the largest value to thresh
+ vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2))
+ vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0))
+ vmax.u8 d25, d25, d26
+ vmax.u8 d20, d20, d25
+
+ vshr.u8 d23, d23, #1 ; a = a / 2
+ vqadd.u8 d24, d24, d23 ; a = b + a
+
+ vmov.u8 d30, #1
+ vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1
+
+ vcge.u8 d20, d30, d20 ; flat
+
+ vand d19, d19, d24 ; mask
+
+ ; hevmask
+ vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1
+ vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1
+ vorr d21, d21, d22 ; hev
+
+ vand d16, d20, d19 ; flat && mask
+ vmov r5, r6, d16
+ orrs r5, r5, r6 ; Check for 0
+ orreq r7, r7, #1 ; Only do filter branch
+
+ ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
+ vabd.u8 d22, d3, d7 ; abs(p4 - p0)
+ vabd.u8 d23, d12, d8 ; abs(q4 - q0)
+ vabd.u8 d24, d7, d2 ; abs(p0 - p5)
+ vabd.u8 d25, d8, d13 ; abs(q0 - q5)
+ vabd.u8 d26, d1, d7 ; abs(p6 - p0)
+ vabd.u8 d27, d14, d8 ; abs(q6 - q0)
+ vabd.u8 d28, d0, d7 ; abs(p7 - p0)
+ vabd.u8 d29, d15, d8 ; abs(q7 - q0)
+
+ ; only compare the largest value to thresh
+ vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0))
+ vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5))
+ vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0))
+ vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0))
+
+ vmax.u8 d26, d22, d23
+ vmax.u8 d27, d24, d25
+ vmax.u8 d23, d26, d27
+
+ vcge.u8 d18, d30, d23 ; flat2
+
+ vmov.u8 d22, #0x80
+
+ vand d17, d18, d16 ; flat2 && flat && mask
+ vmov r5, r6, d17
+ orrs r5, r5, r6 ; Check for 0
+ orreq r7, r7, #2 ; Only do mbfilter branch
+
+ ; mbfilter() function
+
+ ; filter() function
+ ; convert to signed
+ veor d23, d8, d22 ; qs0
+ veor d24, d7, d22 ; ps0
+ veor d25, d6, d22 ; ps1
+ veor d26, d9, d22 ; qs1
+
+ vmov.u8 d27, #3
+
+ vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
+
+ vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
+
+ vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
+
+ vand d29, d29, d21 ; filter &= hev
+
+ vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
+
+ vmov.u8 d29, #4
+
+ ; filter = clamp(filter + 3 * ( qs0 - ps0))
+ vqmovn.s16 d28, q15
+
+ vand d28, d28, d19 ; filter &= mask
+
+ vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
+ vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
+ vshr.s8 d30, d30, #3 ; filter2 >>= 3
+ vshr.s8 d29, d29, #3 ; filter1 >>= 3
+
+
+ vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
+ vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1)
+
+ ; outer tap adjustments: ++filter1 >> 1
+ vrshr.s8 d29, d29, #1
+ vbic d29, d29, d21 ; filter &= ~hev
+
+ vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
+ vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
+
+ veor d24, d24, d22 ; *f_op0 = u^0x80
+ veor d23, d23, d22 ; *f_oq0 = u^0x80
+ veor d25, d25, d22 ; *f_op1 = u^0x80
+ veor d26, d26, d22 ; *f_oq1 = u^0x80
+
+ tst r7, #1
+ bxne lr
+
+ ; mbfilter flat && mask branch
+ ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
+ ; and using vibt on the q's?
+ vmov.u8 d29, #2
+ vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
+ vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
+ vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
+ vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
+ vqrshrn.u16 d18, q15, #3 ; r_op2
+
+ vsubw.u8 q15, d4 ; op1 = op2 - p3
+ vsubw.u8 q15, d5 ; op1 -= p2
+ vaddw.u8 q15, d6 ; op1 += p1
+ vaddw.u8 q15, d9 ; op1 += q1
+ vqrshrn.u16 d19, q15, #3 ; r_op1
+
+ vsubw.u8 q15, d4 ; op0 = op1 - p3
+ vsubw.u8 q15, d6 ; op0 -= p1
+ vaddw.u8 q15, d7 ; op0 += p0
+ vaddw.u8 q15, d10 ; op0 += q2
+ vqrshrn.u16 d20, q15, #3 ; r_op0
+
+ vsubw.u8 q15, d4 ; oq0 = op0 - p3
+ vsubw.u8 q15, d7 ; oq0 -= p0
+ vaddw.u8 q15, d8 ; oq0 += q0
+ vaddw.u8 q15, d11 ; oq0 += q3
+ vqrshrn.u16 d21, q15, #3 ; r_oq0
+
+ vsubw.u8 q15, d5 ; oq1 = oq0 - p2
+ vsubw.u8 q15, d8 ; oq1 -= q0
+ vaddw.u8 q15, d9 ; oq1 += q1
+ vaddw.u8 q15, d11 ; oq1 += q3
+ vqrshrn.u16 d22, q15, #3 ; r_oq1
+
+ vsubw.u8 q15, d6 ; oq2 = oq0 - p1
+ vsubw.u8 q15, d9 ; oq2 -= q1
+ vaddw.u8 q15, d10 ; oq2 += q2
+ vaddw.u8 q15, d11 ; oq2 += q3
+ vqrshrn.u16 d27, q15, #3 ; r_oq2
+
+ ; Filter does not set op2 or oq2, so use p2 and q2.
+ vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask)
+ vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask)
+ vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask)
+ vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask)
+ vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask)
+
+ vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask)
+ vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask)
+
+ tst r7, #2
+ bxne lr
+
+ ; wide_mbfilter flat2 && flat && mask branch
+ vmov.u8 d16, #7
+ vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
+ vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
+ vmlal.u8 q15, d1, d29 ; op6 += p6 * 2
+ vaddw.u8 q15, d2 ; op6 += p5
+ vaddw.u8 q15, d3 ; op6 += p4
+ vaddw.u8 q15, d4 ; op6 += p3
+ vaddw.u8 q15, d5 ; op6 += p2
+ vaddw.u8 q15, d6 ; op6 += p1
+ vqrshrn.u16 d16, q15, #4 ; w_op6
+
+ vsubw.u8 q15, d0 ; op5 = op6 - p7
+ vsubw.u8 q15, d1 ; op5 -= p6
+ vaddw.u8 q15, d2 ; op5 += p5
+ vaddw.u8 q15, d9 ; op5 += q1
+ vqrshrn.u16 d24, q15, #4 ; w_op5
+
+ vsubw.u8 q15, d0 ; op4 = op5 - p7
+ vsubw.u8 q15, d2 ; op4 -= p5
+ vaddw.u8 q15, d3 ; op4 += p4
+ vaddw.u8 q15, d10 ; op4 += q2
+ vqrshrn.u16 d25, q15, #4 ; w_op4
+
+ vsubw.u8 q15, d0 ; op3 = op4 - p7
+ vsubw.u8 q15, d3 ; op3 -= p4
+ vaddw.u8 q15, d4 ; op3 += p3
+ vaddw.u8 q15, d11 ; op3 += q3
+ vqrshrn.u16 d26, q15, #4 ; w_op3
+
+ vsubw.u8 q15, d0 ; op2 = op3 - p7
+ vsubw.u8 q15, d4 ; op2 -= p3
+ vaddw.u8 q15, d5 ; op2 += p2
+ vaddw.u8 q15, d12 ; op2 += q4
+ vqrshrn.u16 d27, q15, #4 ; w_op2
+
+ vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d0 ; op1 = op2 - p7
+ vsubw.u8 q15, d5 ; op1 -= p2
+ vaddw.u8 q15, d6 ; op1 += p1
+ vaddw.u8 q15, d13 ; op1 += q5
+ vqrshrn.u16 d18, q15, #4 ; w_op1
+
+ vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d0 ; op0 = op1 - p7
+ vsubw.u8 q15, d6 ; op0 -= p1
+ vaddw.u8 q15, d7 ; op0 += p0
+ vaddw.u8 q15, d14 ; op0 += q6
+ vqrshrn.u16 d19, q15, #4 ; w_op0
+
+ vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d0 ; oq0 = op0 - p7
+ vsubw.u8 q15, d7 ; oq0 -= p0
+ vaddw.u8 q15, d8 ; oq0 += q0
+ vaddw.u8 q15, d15 ; oq0 += q7
+ vqrshrn.u16 d20, q15, #4 ; w_oq0
+
+ vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d1 ; oq1 = oq0 - p6
+ vsubw.u8 q15, d8 ; oq1 -= q0
+ vaddw.u8 q15, d9 ; oq1 += q1
+ vaddw.u8 q15, d15 ; oq1 += q7
+ vqrshrn.u16 d21, q15, #4 ; w_oq1
+
+ vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d2 ; oq2 = oq1 - p5
+ vsubw.u8 q15, d9 ; oq2 -= q1
+ vaddw.u8 q15, d10 ; oq2 += q2
+ vaddw.u8 q15, d15 ; oq2 += q7
+ vqrshrn.u16 d22, q15, #4 ; w_oq2
+
+ vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d3 ; oq3 = oq2 - p4
+ vsubw.u8 q15, d10 ; oq3 -= q2
+ vaddw.u8 q15, d11 ; oq3 += q3
+ vaddw.u8 q15, d15 ; oq3 += q7
+ vqrshrn.u16 d23, q15, #4 ; w_oq3
+
+ vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d4 ; oq4 = oq3 - p3
+ vsubw.u8 q15, d11 ; oq4 -= q3
+ vaddw.u8 q15, d12 ; oq4 += q4
+ vaddw.u8 q15, d15 ; oq4 += q7
+ vqrshrn.u16 d1, q15, #4 ; w_oq4
+
+ vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d5 ; oq5 = oq4 - p2
+ vsubw.u8 q15, d12 ; oq5 -= q4
+ vaddw.u8 q15, d13 ; oq5 += q5
+ vaddw.u8 q15, d15 ; oq5 += q7
+ vqrshrn.u16 d2, q15, #4 ; w_oq5
+
+ vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
+
+ vsubw.u8 q15, d6 ; oq6 = oq5 - p1
+ vsubw.u8 q15, d13 ; oq6 -= q5
+ vaddw.u8 q15, d14 ; oq6 += q6
+ vaddw.u8 q15, d15 ; oq6 += q7
+ vqrshrn.u16 d3, q15, #4 ; w_oq6
+
+ vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
+ vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
+ vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
+ vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
+ vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
+
+ bx lr
+ ENDP ; |vp9_wide_mbfilter_neon|
+
+ END
diff --git a/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm b/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
index 8e4aadac2..f82966577 100644
--- a/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
+++ b/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
@@ -22,8 +22,8 @@
MACRO
IDCT8x8_1D
; stage 1
- vdup.16 d0, r3; ; duplicate cospi_28_64
- vdup.16 d1, r4; ; duplicate cospi_4_64
+ vdup.16 d0, r3 ; duplicate cospi_28_64
+ vdup.16 d1, r4 ; duplicate cospi_4_64
; input[1] * cospi_28_64
vmull.s16 q2, d18, d0
@@ -57,8 +57,8 @@
vqrshrn.s32 d14, q2, #14 ; >> 14
vqrshrn.s32 d15, q3, #14 ; >> 14
- vdup.16 d0, r5; ; duplicate cospi_12_64
- vdup.16 d1, r6; ; duplicate cospi_20_64
+ vdup.16 d0, r5 ; duplicate cospi_12_64
+ vdup.16 d1, r6 ; duplicate cospi_20_64
; input[5] * cospi_12_64
vmull.s16 q2, d26, d0
@@ -93,7 +93,7 @@
vqrshrn.s32 d13, q1, #14 ; >> 14
; stage 2 & stage 3 - even half
- vdup.16 d0, r7; ; duplicate cospi_16_64
+ vdup.16 d0, r7 ; duplicate cospi_16_64
; input[0] * cospi_16_64
vmull.s16 q2, d16, d0
@@ -128,8 +128,8 @@
vqrshrn.s32 d23, q3, #14 ; >> 14
; input[1] * cospi_24_64 - input[3] * cospi_8_64
- vdup.16 d0, r8; ; duplicate cospi_24_64
- vdup.16 d1, r9; ; duplicate cospi_8_64
+ vdup.16 d0, r8 ; duplicate cospi_24_64
+ vdup.16 d1, r9 ; duplicate cospi_8_64
; input[1] * cospi_24_64
vmull.s16 q2, d20, d0
@@ -176,7 +176,7 @@
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
; stage 3 -odd half
- vdup.16 d16, r7; ; duplicate cospi_16_64
+ vdup.16 d16, r7 ; duplicate cospi_16_64
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
@@ -211,14 +211,14 @@
vqrshrn.s32 d13, q10, #14 ; >> 14
; stage 4
- vadd.s16 q8, q0, q7; ; output[0] = step1[0] + step1[7];
- vadd.s16 q9, q1, q6; ; output[1] = step1[1] + step1[6];
- vadd.s16 q10, q2, q5; ; output[2] = step1[2] + step1[5];
- vadd.s16 q11, q3, q4; ; output[3] = step1[3] + step1[4];
- vsub.s16 q12, q3, q4; ; output[4] = step1[3] - step1[4];
- vsub.s16 q13, q2, q5; ; output[5] = step1[2] - step1[5];
- vsub.s16 q14, q1, q6; ; output[6] = step1[1] - step1[6];
- vsub.s16 q15, q0, q7; ; output[7] = step1[0] - step1[7];
+ vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
+ vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
+ vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
+ vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
+ vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
+ vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
+ vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
+ vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
MEND
; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15.
@@ -310,14 +310,14 @@
mov r0, r1
; load destination data
- vld1.u8 {d0}, [r1], r2
- vld1.u8 {d1}, [r1], r2
- vld1.s16 {d2}, [r1], r2
- vld1.s16 {d3}, [r1], r2
- vld1.s16 {d4}, [r1], r2
- vld1.s16 {d5}, [r1], r2
- vld1.s16 {d6}, [r1], r2
- vld1.s16 {d7}, [r1]
+ vld1.64 {d0}, [r1], r2
+ vld1.64 {d1}, [r1], r2
+ vld1.64 {d2}, [r1], r2
+ vld1.64 {d3}, [r1], r2
+ vld1.64 {d4}, [r1], r2
+ vld1.64 {d5}, [r1], r2
+ vld1.64 {d6}, [r1], r2
+ vld1.64 {d7}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d0
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 129711412..f68c5c6ea 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -26,9 +26,6 @@
#include "vp9/common/vp9_treecoder.h"
#define BLOCK_SIZE_GROUPS 4
-
-#define PREDICTION_PROBS 3
-
#define MBSKIP_CONTEXTS 3
/* Segment Feature Masks */
@@ -164,6 +161,11 @@ typedef struct {
union b_mode_info bmi[4];
} MODE_INFO;
+static int is_inter_block(const MB_MODE_INFO *mbmi) {
+ return mbmi->ref_frame[0] > INTRA_FRAME;
+}
+
+
enum mv_precision {
MV_PRECISION_Q3,
MV_PRECISION_Q4
@@ -286,22 +288,22 @@ typedef struct macroblockd {
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
switch (subsize) {
- case BLOCK_SIZE_SB64X64:
- case BLOCK_SIZE_SB64X32:
- case BLOCK_SIZE_SB32X64:
- case BLOCK_SIZE_SB32X32:
+ case BLOCK_64X64:
+ case BLOCK_64X32:
+ case BLOCK_32X64:
+ case BLOCK_32X32:
return &xd->sb_index;
- case BLOCK_SIZE_SB32X16:
- case BLOCK_SIZE_SB16X32:
- case BLOCK_SIZE_MB16X16:
+ case BLOCK_32X16:
+ case BLOCK_16X32:
+ case BLOCK_16X16:
return &xd->mb_index;
- case BLOCK_SIZE_SB16X8:
- case BLOCK_SIZE_SB8X16:
- case BLOCK_SIZE_SB8X8:
+ case BLOCK_16X8:
+ case BLOCK_8X16:
+ case BLOCK_8X8:
return &xd->b_index;
- case BLOCK_SIZE_SB8X4:
- case BLOCK_SIZE_SB4X8:
- case BLOCK_SIZE_AB4X4:
+ case BLOCK_8X4:
+ case BLOCK_4X8:
+ case BLOCK_4X4:
return &xd->ab_index;
default:
assert(0);
@@ -315,7 +317,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
const int bwl = b_width_log2(sb_type);
const int bhl = b_height_log2(sb_type);
- const int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+ const int boffset = b_width_log2(BLOCK_64X64) - bsl;
const char pcval0 = ~(0xe << boffset);
const char pcval1 = ~(0xf << boffset);
const char pcvalue[2] = {pcval0, pcval1};
@@ -333,7 +335,7 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type) {
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
int above = 0, left = 0, i;
- int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+ int boffset = mi_width_log2(BLOCK_64X64) - bsl;
assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
assert(bsl >= 0);
@@ -366,10 +368,10 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
if (plane_type != PLANE_TYPE_Y_WITH_DC ||
xd->lossless ||
- mbmi->ref_frame[0] != INTRA_FRAME)
+ is_inter_block(mbmi))
return DCT_DCT;
- return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ?
+ return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ?
mi->bmi[ib].as_mode : mbmi->mode];
}
@@ -496,16 +498,16 @@ static INLINE void foreach_transformed_block_in_plane(
// it to 4x4 block sizes.
if (xd->mb_to_right_edge < 0)
max_blocks_wide +=
- + (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
+ (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
if (xd->mb_to_bottom_edge < 0)
max_blocks_high +=
- + (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
+ (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
i = 0;
// Unlike the normal case - in here we have to keep track of the
// row and column of the blocks we use so that we know if we are in
- // the unrestricted motion border..
+ // the unrestricted motion border.
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
if (r < max_blocks_high && c < max_blocks_wide)
@@ -563,8 +565,8 @@ static INLINE void foreach_predicted_block_in_plane(
// size of the predictor to use.
int pred_w, pred_h;
- if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
- assert(bsize == BLOCK_SIZE_SB8X8);
+ if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) {
+ assert(bsize == BLOCK_8X8);
pred_w = 0;
pred_h = 0;
} else {
@@ -689,46 +691,39 @@ static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
}
}
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
- int plane, int ss_tx_size, int eob, int aoff,
- int loff, ENTROPY_CONTEXT *A,
- ENTROPY_CONTEXT *L) {
- const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
- const int sw = bw - xd->plane[plane].subsampling_x;
- const int sh = bh - xd->plane[plane].subsampling_y;
- int mi_blocks_wide = 1 << sw;
- int mi_blocks_high = 1 << sh;
- int tx_size_in_blocks = (1 << ss_tx_size);
+ int plane, int tx_size_in_blocks,
+ int eob, int aoff, int loff,
+ ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
int above_contexts = tx_size_in_blocks;
int left_contexts = tx_size_in_blocks;
+ int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd);
+ int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd);
int pt;
// xd->mb_to_right_edge is in units of pixels * 8. This converts
// it to 4x4 block sizes.
- if (xd->mb_to_right_edge < 0) {
- mi_blocks_wide += (xd->mb_to_right_edge
- >> (5 + xd->plane[plane].subsampling_x));
- }
+ if (xd->mb_to_right_edge < 0)
+ mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
// this code attempts to avoid copying into contexts that are outside
// our border. Any blocks that do are set to 0...
if (above_contexts + aoff > mi_blocks_wide)
above_contexts = mi_blocks_wide - aoff;
- if (xd->mb_to_bottom_edge < 0) {
- mi_blocks_high += (xd->mb_to_bottom_edge
- >> (5 + xd->plane[plane].subsampling_y));
- }
- if (left_contexts + loff > mi_blocks_high) {
+ if (xd->mb_to_bottom_edge < 0)
+ mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ if (left_contexts + loff > mi_blocks_high)
left_contexts = mi_blocks_high - loff;
- }
for (pt = 0; pt < above_contexts; pt++)
A[pt] = eob > 0;
- for (pt = above_contexts; pt < (1 << ss_tx_size); pt++)
+ for (pt = above_contexts; pt < tx_size_in_blocks; pt++)
A[pt] = 0;
for (pt = 0; pt < left_contexts; pt++)
L[pt] = eob > 0;
- for (pt = left_contexts; pt < (1 << ss_tx_size); pt++)
+ for (pt = left_contexts; pt < tx_size_in_blocks; pt++)
L[pt] = 0;
}
diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c
index dee44ec63..fdf37e46a 100644
--- a/libvpx/vp9/common/vp9_common_data.c
+++ b/libvpx/vp9/common/vp9_common_data.c
@@ -31,6 +31,14 @@ const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
+// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
+const int size_group_lookup[BLOCK_SIZE_TYPES] =
+ {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
+
+const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] =
+ {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
+
+
const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
{ // 4X4
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
@@ -40,25 +48,25 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID
}, { // 8X8
- // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID
}, { // 16X16
- // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 32X32
- // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT,
PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 64X64
- // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
+ // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ,
@@ -68,29 +76,29 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
{ // PARTITION_NONE
- BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB8X4,
- BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB16X8,
- BLOCK_SIZE_MB16X16, BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB32X16,
- BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64, BLOCK_SIZE_SB64X32,
- BLOCK_SIZE_SB64X64,
+ BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+ BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
+ BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
+ BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
+ BLOCK_64X64,
}, { // PARTITION_HORZ
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB64X32,
+ BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_64X32,
}, { // PARTITION_VERT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB32X64,
+ BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_32X64,
}, { // PARTITION_SPLIT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_AB4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_MB16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
- BLOCK_SIZE_SB32X32,
+ BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
+ BLOCK_32X32,
}
};
@@ -108,14 +116,9 @@ const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
};
const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
- {BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8,
- BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8},
- {BLOCK_SIZE_SB8X4, BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16,
- BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB8X16},
- {BLOCK_SIZE_SB16X8, BLOCK_SIZE_SB16X8, BLOCK_SIZE_MB16X16,
- BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB16X32},
- {BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16,
- BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64},
- {BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32,
- BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X64}
+ { BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 },
+ { BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 },
+ { BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 },
+ { BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 },
+ { BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 }
};
diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h
index 8b0f8a500..bc8c01a77 100644
--- a/libvpx/vp9/common/vp9_common_data.h
+++ b/libvpx/vp9/common/vp9_common_data.h
@@ -21,10 +21,9 @@ extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES];
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES];
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES];
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES];
-extern const PARTITION_TYPE
- partition_lookup[][BLOCK_SIZE_TYPES];
-
-
+extern const int size_group_lookup[BLOCK_SIZE_TYPES];
+extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES];
+extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES];
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c
index 0ad0dbccd..df3a9fed5 100644
--- a/libvpx/vp9/common/vp9_entropy.c
+++ b/libvpx/vp9/common/vp9_entropy.c
@@ -73,7 +73,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]) = {
13, 11, 14, 15,
};
-DECLARE_ALIGNED(64, const int16_t, vp9_default_scan_8x8[64]) = {
+DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]) = {
0, 8, 1, 16, 9, 2, 17, 24,
10, 3, 18, 25, 32, 11, 4, 26,
33, 19, 40, 12, 34, 27, 5, 41,
@@ -419,7 +419,7 @@ static void init_bit_trees() {
init_bit_tree(cat6, 14);
}
-vp9_extra_bit vp9_extra_bits[12] = {
+const vp9_extra_bit vp9_extra_bits[12] = {
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
@@ -437,14 +437,10 @@ vp9_extra_bit vp9_extra_bits[12] = {
#include "vp9/common/vp9_default_coef_probs.h"
void vp9_default_coef_probs(VP9_COMMON *pc) {
- vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4,
- sizeof(pc->fc.coef_probs[TX_4X4]));
- vpx_memcpy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8,
- sizeof(pc->fc.coef_probs[TX_8X8]));
- vpx_memcpy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16,
- sizeof(pc->fc.coef_probs[TX_16X16]));
- vpx_memcpy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32,
- sizeof(pc->fc.coef_probs[TX_32X32]));
+ vp9_copy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
+ vp9_copy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
+ vp9_copy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
+ vp9_copy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
}
// Neighborhood 5-tuples for various scans and blocksizes,
@@ -613,17 +609,17 @@ void vp9_coef_tree_initialize() {
#define COEF_COUNT_SAT_AFTER_KEY 24
#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size,
- int count_sat, int update_factor) {
+static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
+ unsigned int count_sat,
+ unsigned int update_factor) {
FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
- vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[txfm_size];
- vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[txfm_size];
- vp9_coeff_count_model *coef_counts = cm->counts.coef[txfm_size];
+ vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[tx_size];
+ vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[tx_size];
+ vp9_coeff_count_model *coef_counts = cm->counts.coef[tx_size];
unsigned int (*eob_branch_count)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] =
- cm->counts.eob_branch[txfm_size];
- int t, i, j, k, l, count;
- int factor;
+ cm->counts.eob_branch[tx_size];
+ int t, i, j, k, l;
unsigned int branch_ct[UNCONSTRAINED_NODES][2];
vp9_prob coef_probs[UNCONSTRAINED_NODES];
int entropy_nodes_adapt = UNCONSTRAINED_NODES;
@@ -634,29 +630,23 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size,
for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
if (l >= 3 && k == 0)
continue;
- vp9_tree_probs_from_distribution(
- vp9_coefmodel_tree,
- coef_probs, branch_ct,
- coef_counts[i][j][k][l], 0);
+ vp9_tree_probs_from_distribution(vp9_coefmodel_tree, coef_probs,
+ branch_ct, coef_counts[i][j][k][l],
+ 0);
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
- for (t = 0; t < entropy_nodes_adapt; ++t) {
- count = branch_ct[t][0] + branch_ct[t][1];
- count = count > count_sat ? count_sat : count;
- factor = (update_factor * count / count_sat);
- dst_coef_probs[i][j][k][l][t] =
- weighted_prob(pre_coef_probs[i][j][k][l][t],
- coef_probs[t], factor);
- }
+ for (t = 0; t < entropy_nodes_adapt; ++t)
+ dst_coef_probs[i][j][k][l][t] = merge_probs(
+ pre_coef_probs[i][j][k][l][t], coef_probs[t],
+ branch_ct[t], count_sat, update_factor);
}
}
void vp9_adapt_coef_probs(VP9_COMMON *cm) {
TX_SIZE t;
- int count_sat;
- int update_factor; /* denominator 256 */
+ unsigned int count_sat, update_factor;
- if ((cm->frame_type == KEY_FRAME) || cm->intra_only) {
+ if (cm->frame_type == KEY_FRAME || cm->intra_only) {
update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
count_sat = COEF_COUNT_SAT_KEY;
} else if (cm->last_frame_type == KEY_FRAME) {
diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h
index 4ea727ff4..861c0786c 100644
--- a/libvpx/vp9/common/vp9_entropy.h
+++ b/libvpx/vp9/common/vp9_entropy.h
@@ -50,7 +50,7 @@ typedef struct {
int base_val;
} vp9_extra_bit;
-extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
+extern const vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
#define MAX_PROB 255
#define DCT_MAX_VALUE 16384
@@ -80,7 +80,6 @@ extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
coefficient band (and since zigzag positions 0, 1, and 2 are in
distinct bands). */
-/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
#define PREV_COEF_CONTEXTS 6
// #define ENTROPY_STATS
@@ -102,7 +101,7 @@ extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]);
-extern DECLARE_ALIGNED(64, const int16_t, vp9_default_scan_8x8[64]);
+extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]);
@@ -119,7 +118,7 @@ extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]);
extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]);
extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_4x4[16]);
-extern DECLARE_ALIGNED(64, int16_t, vp9_default_iscan_8x8[64]);
+extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_8x8[64]);
extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_8x8[64]);
extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_8x8[64]);
diff --git a/libvpx/vp9/common/vp9_entropymode.c b/libvpx/vp9/common/vp9_entropymode.c
index ca188e438..768e5f523 100644
--- a/libvpx/vp9/common/vp9_entropymode.c
+++ b/libvpx/vp9/common/vp9_entropymode.c
@@ -356,53 +356,15 @@ void vp9_entropy_mode_init() {
vp9_inter_mode_tree, NEARESTMV);
}
-void vp9_accum_mv_refs(VP9_COMMON *pc,
- MB_PREDICTION_MODE m,
- const int context) {
- unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] =
- pc->counts.inter_mode;
-
- if (m == ZEROMV) {
- ++inter_mode_counts[context][0][0];
- } else {
- ++inter_mode_counts[context][0][1];
- if (m == NEARESTMV) {
- ++inter_mode_counts[context][1][0];
- } else {
- ++inter_mode_counts[context][1][1];
- if (m == NEARMV) {
- ++inter_mode_counts[context][2][0];
- } else {
- ++inter_mode_counts[context][2][1];
- }
- }
- }
-}
-
#define COUNT_SAT 20
#define MAX_UPDATE_FACTOR 128
-static int update_ct(vp9_prob pre_prob, vp9_prob prob,
- unsigned int ct[2]) {
- const int count = MIN(ct[0] + ct[1], COUNT_SAT);
- const int factor = MAX_UPDATE_FACTOR * count / COUNT_SAT;
- return weighted_prob(pre_prob, prob, factor);
+static int update_ct(vp9_prob pre_prob, vp9_prob prob, unsigned int ct[2]) {
+ return merge_probs(pre_prob, prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
}
static int update_ct2(vp9_prob pre_prob, unsigned int ct[2]) {
- return update_ct(pre_prob, get_binary_prob(ct[0], ct[1]), ct);
-}
-
-void vp9_adapt_mode_context(VP9_COMMON *pc) {
- int i, j;
- FRAME_CONTEXT *const fc = &pc->fc;
- FRAME_CONTEXT *const pre_fc = &pc->frame_contexts[pc->frame_context_idx];
- FRAME_COUNTS *const counts = &pc->counts;
-
- for (j = 0; j < INTER_MODE_CONTEXTS; j++)
- for (i = 0; i < VP9_INTER_MODES - 1; i++)
- fc->inter_mode_probs[j][i] = update_ct2(pre_fc->inter_mode_probs[j][i],
- counts->inter_mode[j][i]);
+ return merge_probs2(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
}
static void update_mode_probs(int n_modes,
@@ -440,6 +402,11 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
fc->single_ref_prob[i][j] = update_ct2(pre_fc->single_ref_prob[i][j],
counts->single_ref[i][j]);
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+ update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
+ counts->inter_mode[i], pre_fc->inter_mode_probs[i],
+ fc->inter_mode_probs[i], NEARESTMV);
+
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
counts->y_mode[i], pre_fc->y_mode_prob[i],
@@ -466,25 +433,25 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
if (cm->tx_mode == TX_MODE_SELECT) {
int j;
- unsigned int branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2];
- unsigned int branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2];
- unsigned int branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2];
+ unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
+ unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
+ unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
- for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
+ for (j = 0; j < TX_SIZES - 3; ++j)
fc->tx_probs.p8x8[i][j] = update_ct2(pre_fc->tx_probs.p8x8[i][j],
branch_ct_8x8p[j]);
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i],
branch_ct_16x16p);
- for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
+ for (j = 0; j < TX_SIZES - 2; ++j)
fc->tx_probs.p16x16[i][j] = update_ct2(pre_fc->tx_probs.p16x16[i][j],
branch_ct_16x16p[j]);
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i],
branch_ct_32x32p);
- for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
+ for (j = 0; j < TX_SIZES - 1; ++j)
fc->tx_probs.p32x32[i][j] = update_ct2(pre_fc->tx_probs.p32x32[i][j],
branch_ct_32x32p[j]);
}
@@ -495,22 +462,24 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
counts->mbskip[i]);
}
-static void set_default_lf_deltas(MACROBLOCKD *xd) {
- xd->lf.mode_ref_delta_enabled = 1;
- xd->lf.mode_ref_delta_update = 1;
+static void set_default_lf_deltas(struct loopfilter *lf) {
+ lf->mode_ref_delta_enabled = 1;
+ lf->mode_ref_delta_update = 1;
- xd->lf.ref_deltas[INTRA_FRAME] = 1;
- xd->lf.ref_deltas[LAST_FRAME] = 0;
- xd->lf.ref_deltas[GOLDEN_FRAME] = -1;
- xd->lf.ref_deltas[ALTREF_FRAME] = -1;
+ lf->ref_deltas[INTRA_FRAME] = 1;
+ lf->ref_deltas[LAST_FRAME] = 0;
+ lf->ref_deltas[GOLDEN_FRAME] = -1;
+ lf->ref_deltas[ALTREF_FRAME] = -1;
- xd->lf.mode_deltas[0] = 0;
- xd->lf.mode_deltas[1] = 0;
+ lf->mode_deltas[0] = 0;
+ lf->mode_deltas[1] = 0;
}
void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
// Reset the segment feature data to the default stats:
// Features disabled, 0, with delta coding (Default state).
+ struct loopfilter *const lf = &xd->lf;
+
int i;
vp9_clearall_segfeatures(&xd->seg);
xd->seg.abs_delta = SEGMENT_DELTADATA;
@@ -518,12 +487,12 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
// Reset the mode ref deltas for loop filter
- vp9_zero(xd->lf.last_ref_deltas);
- vp9_zero(xd->lf.last_mode_deltas);
- set_default_lf_deltas(xd);
+ vp9_zero(lf->last_ref_deltas);
+ vp9_zero(lf->last_mode_deltas);
+ set_default_lf_deltas(lf);
// To force update of the sharpness
- xd->lf.last_sharpness_level = -1;
+ lf->last_sharpness_level = -1;
vp9_default_coef_probs(cm);
vp9_init_mbmode_probs(cm);
diff --git a/libvpx/vp9/common/vp9_entropymode.h b/libvpx/vp9/common/vp9_entropymode.h
index 8c14e7e17..17a7c2634 100644
--- a/libvpx/vp9/common/vp9_entropymode.h
+++ b/libvpx/vp9/common/vp9_entropymode.h
@@ -24,15 +24,15 @@
struct VP9Common;
struct tx_probs {
- vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1];
- vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2];
- vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 3];
+ vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+ vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
+ vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
};
struct tx_counts {
- unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB];
- unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1];
- unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2];
+ unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
+ unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+ unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
};
extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
@@ -61,18 +61,12 @@ extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
void vp9_entropy_mode_init();
-int vp9_mv_cont(const int_mv *l, const int_mv *a);
-
void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd);
void vp9_init_mbmode_probs(struct VP9Common *x);
-void vp9_adapt_mode_context(struct VP9Common *pc);
-
void vp9_adapt_mode_probs(struct VP9Common *);
-void vp9_accum_mv_refs(struct VP9Common *pc, MB_PREDICTION_MODE m, int context);
-
void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]);
void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c
index 343b6241d..6cfc34697 100644
--- a/libvpx/vp9/common/vp9_entropymv.c
+++ b/libvpx/vp9/common/vp9_entropymv.c
@@ -16,7 +16,7 @@
#define MV_MAX_UPDATE_FACTOR 128
/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
-#define COMPANDED_MVREF_THRESH 8
+#define COMPANDED_MVREF_THRESH 8
const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-MV_JOINT_ZERO, 2,
@@ -107,12 +107,6 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
return mv_class_base(c) + offset;
}
-static void inc_mv_component_count(int v, nmv_component_counts *comp_counts,
- int incr) {
- assert (v != 0);
- comp_counts->mvcount[MV_MAX + v] += incr;
-}
-
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
int incr, int usehp) {
int s, z, c, o, d, e, f;
@@ -164,25 +158,19 @@ static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
}
}
-void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx) {
+void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
- mvctx->joints[j]++;
+ ++counts->joints[j];
+
if (mv_joint_vertical(j))
- inc_mv_component_count(mv->row, &mvctx->comps[0], 1);
+ ++counts->comps[0].mvcount[MV_MAX + mv->row];
if (mv_joint_horizontal(j))
- inc_mv_component_count(mv->col, &mvctx->comps[1], 1);
+ ++counts->comps[1].mvcount[MV_MAX + mv->col];
}
-static void adapt_prob(vp9_prob *dest, vp9_prob prep, unsigned int ct[2]) {
- const int count = MIN(ct[0] + ct[1], MV_COUNT_SAT);
- if (count) {
- const vp9_prob newp = get_binary_prob(ct[0], ct[1]);
- const int factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT;
- *dest = weighted_prob(prep, newp, factor);
- } else {
- *dest = prep;
- }
+static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
+ return merge_probs2(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
}
void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
@@ -195,31 +183,22 @@ static unsigned int adapt_probs(unsigned int i,
vp9_prob this_probs[],
const vp9_prob last_probs[],
const unsigned int num_events[]) {
- vp9_prob this_prob;
- const uint32_t left = tree[i] <= 0
+
+ const unsigned int left = tree[i] <= 0
? num_events[-tree[i]]
: adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
- const uint32_t right = tree[i + 1] <= 0
+ const unsigned int right = tree[i + 1] <= 0
? num_events[-tree[i + 1]]
: adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
-
- uint32_t weight = left + right;
- if (weight) {
- this_prob = get_binary_prob(left, right);
- weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight;
- this_prob = weighted_prob(last_probs[i >> 1], this_prob,
- MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT);
- } else {
- this_prob = last_probs[i >> 1];
- }
- this_probs[i >> 1] = this_prob;
+ const unsigned int ct[2] = { left, right };
+ this_probs[i >> 1] = adapt_prob(last_probs[i >> 1], ct);
return left + right;
}
-void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) {
+void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
int i, j;
FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
@@ -228,36 +207,32 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) {
nmv_context *pre_ctx = &pre_fc->nmvc;
nmv_context_counts *cts = &cm->counts.mv;
- vp9_counts_process(cts, usehp);
+ vp9_counts_process(cts, allow_hp);
adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
for (i = 0; i < 2; ++i) {
- adapt_prob(&ctx->comps[i].sign, pre_ctx->comps[i].sign, cts->comps[i].sign);
+ ctx->comps[i].sign = adapt_prob(pre_ctx->comps[i].sign, cts->comps[i].sign);
adapt_probs(0, vp9_mv_class_tree, ctx->comps[i].classes,
pre_ctx->comps[i].classes, cts->comps[i].classes);
adapt_probs(0, vp9_mv_class0_tree, ctx->comps[i].class0,
pre_ctx->comps[i].class0, cts->comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j)
- adapt_prob(&ctx->comps[i].bits[j], pre_ctx->comps[i].bits[j],
- cts->comps[i].bits[j]);
- }
+ ctx->comps[i].bits[j] = adapt_prob(pre_ctx->comps[i].bits[j],
+ cts->comps[i].bits[j]);
- for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j)
adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].class0_fp[j],
pre_ctx->comps[i].class0_fp[j], cts->comps[i].class0_fp[j]);
adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp,
cts->comps[i].fp);
- }
- if (usehp) {
- for (i = 0; i < 2; ++i) {
- adapt_prob(&ctx->comps[i].class0_hp, pre_ctx->comps[i].class0_hp,
- cts->comps[i].class0_hp);
- adapt_prob(&ctx->comps[i].hp, pre_ctx->comps[i].hp, cts->comps[i].hp);
+ if (allow_hp) {
+ ctx->comps[i].class0_hp = adapt_prob(pre_ctx->comps[i].class0_hp,
+ cts->comps[i].class0_hp);
+ ctx->comps[i].hp = adapt_prob(pre_ctx->comps[i].hp, cts->comps[i].hp);
}
}
}
diff --git a/libvpx/vp9/common/vp9_enums.h b/libvpx/vp9/common/vp9_enums.h
index 86f0d0bfd..3208b7270 100644
--- a/libvpx/vp9/common/vp9_enums.h
+++ b/libvpx/vp9/common/vp9_enums.h
@@ -54,7 +54,7 @@ typedef enum {
TX_8X8 = 1, // 8x8 dct transform
TX_16X16 = 2, // 16x16 dct transform
TX_32X32 = 3, // 32x32 dct transform
- TX_SIZE_MAX_SB, // Number of transforms available to SBs
+ TX_SIZES
} TX_SIZE;
typedef enum {
@@ -63,7 +63,7 @@ typedef enum {
ALLOW_16X16 = 2,
ALLOW_32X32 = 3,
TX_MODE_SELECT = 4,
- NB_TXFM_MODES = 5,
+ TX_MODES = 5,
} TX_MODE;
typedef enum {
diff --git a/libvpx/vp9/common/vp9_extend.c b/libvpx/vp9/common/vp9_extend.c
index 95ec59061..d8496c4f2 100644
--- a/libvpx/vp9/common/vp9_extend.c
+++ b/libvpx/vp9/common/vp9_extend.c
@@ -8,9 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_extend.h"
#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_extend.h"
+
static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch,
int w, int h,
@@ -107,14 +109,14 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
const int src_y_offset = srcy * src->y_stride + srcx;
const int dst_y_offset = srcy * dst->y_stride + srcx;
- const int et_uv = (et_y + 1) >> 1;
- const int el_uv = (el_y + 1) >> 1;
- const int eb_uv = (eb_y + 1) >> 1;
- const int er_uv = (er_y + 1) >> 1;
+ const int et_uv = ROUND_POWER_OF_TWO(et_y, 1);
+ const int el_uv = ROUND_POWER_OF_TWO(el_y, 1);
+ const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1);
+ const int er_uv = ROUND_POWER_OF_TWO(er_y, 1);
const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
- const int srch_uv = (srch + 1) >> 1;
- const int srcw_uv = (srcw + 1) >> 1;
+ const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
+ const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride,
diff --git a/libvpx/vp9/common/vp9_findnearmv.c b/libvpx/vp9/common/vp9_findnearmv.c
index 643b229a6..3af8b8d21 100644
--- a/libvpx/vp9/common/vp9_findnearmv.c
+++ b/libvpx/vp9/common/vp9_findnearmv.c
@@ -14,8 +14,9 @@
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_sadmxn.h"
-static void lower_mv_precision(int_mv *mv, int usehp) {
- if (!usehp || !vp9_use_mv_hp(&mv->as_mv)) {
+static void lower_mv_precision(int_mv *mv, int allow_hp) {
+ const int use_hp = allow_hp && vp9_use_mv_hp(&mv->as_mv);
+ if (!use_hp) {
if (mv->as_mv.row & 1)
mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
if (mv->as_mv.col & 1)
@@ -32,7 +33,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
// Make sure all the candidates are properly clamped etc
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv);
- clamp_mv2(&mvlist[i], xd);
+ clamp_mv2(&mvlist[i].as_mv, xd);
}
*nearest = mvlist[0];
*near = mvlist[1];
@@ -41,7 +42,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
int_mv *dst_nearest,
int_mv *dst_near,
- int block_idx, int ref_idx) {
+ int block_idx, int ref_idx,
+ int mi_row, int mi_col) {
int_mv dst_list[MAX_MV_REF_CANDIDATES];
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *mi = xd->mode_info_context;
@@ -53,7 +55,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context,
xd->prev_mode_info_context,
mbmi->ref_frame[ref_idx],
- mv_list, cm->ref_frame_sign_bias, block_idx);
+ mv_list, cm->ref_frame_sign_bias, block_idx,
+ mi_row, mi_col);
dst_list[1].as_int = 0;
if (block_idx == 0) {
diff --git a/libvpx/vp9/common/vp9_findnearmv.h b/libvpx/vp9/common/vp9_findnearmv.h
index b0fa505b5..e5221ed67 100644
--- a/libvpx/vp9/common/vp9_findnearmv.h
+++ b/libvpx/vp9/common/vp9_findnearmv.h
@@ -29,31 +29,19 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
int_mv *near);
// TODO(jingning): this mv clamping function should be block size dependent.
-static void clamp_mv(int_mv *mv,
- int mb_to_left_edge,
- int mb_to_right_edge,
- int mb_to_top_edge,
- int mb_to_bottom_edge) {
- mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge);
- mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge);
-}
-
-static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
- int_mv tmp_mv;
- tmp_mv.as_int = mv->as_int;
- clamp_mv(mv,
- xd->mb_to_left_edge - LEFT_TOP_MARGIN,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
- return tmp_mv.as_int != mv->as_int;
+static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+ clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc,
MACROBLOCKD *xd,
int_mv *dst_nearest,
int_mv *dst_near,
- int block_idx, int ref_idx);
+ int block_idx, int ref_idx,
+ int mi_row, int mi_col);
static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
// FIXME(rbultje, jingning): temporary hack because jenkins doesn't
@@ -62,7 +50,7 @@ static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
/* On L edge, get from MB to left of us */
--cur_mb;
- if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&cur_mb->mbmi)) {
return DC_PRED;
} else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
return ((cur_mb->bmi + 1 + b)->as_mode);
@@ -80,7 +68,7 @@ static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
- if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&cur_mb->mbmi)) {
return DC_PRED;
} else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
return ((cur_mb->bmi + 2 + b)->as_mode);
diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c
index a95560a55..a2245259e 100644
--- a/libvpx/vp9/common/vp9_idct.c
+++ b/libvpx/vp9/common/vp9_idct.c
@@ -225,6 +225,19 @@ void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
}
}
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+ int i, j;
+ int a1;
+ int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ a1 = ROUND_POWER_OF_TWO(out, 5);
+ for (j = 0; j < 8; ++j) {
+ for (i = 0; i < 8; ++i)
+ dest[i] = clip_pixel(dest[i] + a1);
+ dest += dest_stride;
+ }
+}
+
static void iadst4_1d(int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
@@ -433,12 +446,6 @@ void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
}
}
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) {
- int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
- out = dct_const_round_shift(out * cospi_16_64);
- output[0] = ROUND_POWER_OF_TWO(out, 5);
-}
-
static void idct16_1d(int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
@@ -857,10 +864,18 @@ void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
}
}
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
+ int dest_stride) {
+ int i, j;
+ int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
- output[0] = ROUND_POWER_OF_TWO(out, 6);
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i)
+ dest[i] = clip_pixel(dest[i] + a1);
+ dest += dest_stride;
+ }
}
static void idct32_1d(int16_t *input, int16_t *output) {
@@ -1259,29 +1274,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
out = dct_const_round_shift(out * cospi_16_64);
output[0] = ROUND_POWER_OF_TWO(out, 6);
}
-
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest,
- int dest_stride) {
- int16_t out[32 * 32] = { 0 };
- int16_t *outptr = out;
- int i, j;
- int16_t temp_in[32], temp_out[32];
-
- // First transform rows. Since all non-zero dct coefficients are in
- // upper-left 4x4 area, we only need to calculate first 4 rows here.
- for (i = 0; i < 4; ++i) {
- idct32_1d(input, outptr);
- input += 32;
- outptr += 32;
- }
-
- // Columns
- for (i = 0; i < 32; ++i) {
- for (j = 0; j < 32; ++j)
- temp_in[j] = out[j * 32 + i];
- idct32_1d(temp_in, temp_out);
- for (j = 0; j < 32; ++j)
- dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
- + dest[j * dest_stride + i]);
- }
-}
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c
index 5498b1717..66df62753 100644
--- a/libvpx/vp9/common/vp9_loopfilter.c
+++ b/libvpx/vp9/common/vp9_loopfilter.c
@@ -16,6 +16,12 @@
#include "vp9/common/vp9_seg_common.h"
+struct loop_filter_info {
+ const uint8_t *mblim;
+ const uint8_t *lim;
+ const uint8_t *hev_thr;
+};
+
static void lf_init_lut(loop_filter_info_n *lfi) {
lfi->mode_lf_lut[DC_PRED] = 0;
lfi->mode_lf_lut[D45_PRED] = 0;
@@ -73,13 +79,14 @@ void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) {
void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int default_filt_lvl) {
- int seg;
+ int seg_id;
// n_shift is the a multiplier for lf_deltas
// the multiplier is 1 for when filter_lvl is between 0 and 31;
// 2 when filter_lvl is between 32 and 63
const int n_shift = default_filt_lvl >> 5;
loop_filter_info_n *const lfi = &cm->lf_info;
- struct loopfilter *lf = &xd->lf;
+ struct loopfilter *const lf = &xd->lf;
+ struct segmentation *const seg = &xd->seg;
// update limits if sharpness has changed
if (lf->last_sharpness_level != lf->sharpness_level) {
@@ -87,13 +94,13 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
lf->last_sharpness_level = lf->sharpness_level;
}
- for (seg = 0; seg < MAX_SEGMENTS; seg++) {
+ for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
// Set the baseline filter values for each segment
- if (vp9_segfeature_active(&xd->seg, seg, SEG_LVL_ALT_LF)) {
- const int data = vp9_get_segdata(&xd->seg, seg, SEG_LVL_ALT_LF);
- lvl_seg = xd->seg.abs_delta == SEGMENT_ABSDATA
+ if (vp9_segfeature_active(&xd->seg, seg_id, SEG_LVL_ALT_LF)) {
+ const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+ lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
? data
: clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER);
}
@@ -101,18 +108,18 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
if (!lf->mode_ref_delta_enabled) {
// we could get rid of this if we assume that deltas are set to
// zero when not in use; encoder always uses deltas
- vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4);
+ vpx_memset(lfi->lvl[seg_id][0], lvl_seg, 4 * 4);
continue;
}
intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
- lfi->lvl[seg][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+ lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
+ (lf->mode_deltas[mode] << n_shift);
- lfi->lvl[seg][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
}
}
}
@@ -256,7 +263,7 @@ static void filter_block_plane(VP9_COMMON *const cm,
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const int skip_this = mi[c].mbmi.mb_skip_coeff
- && mi[c].mbmi.ref_frame[0] != INTRA_FRAME;
+ && is_inter_block(&mi[c].mbmi);
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ?
!(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
@@ -376,3 +383,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
+
+int vp9_loop_filter_worker(void *arg1, void *arg2) {
+ LFWorkerData *const lf_data = (LFWorkerData*)arg1;
+ (void)arg2;
+ vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+ lf_data->start, lf_data->stop, lf_data->y_only);
+ return 1;
+}
diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h
index e59cc6485..5fc909495 100644
--- a/libvpx/vp9/common/vp9_loopfilter.h
+++ b/libvpx/vp9/common/vp9_loopfilter.h
@@ -35,13 +35,6 @@ typedef struct {
uint8_t mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
-struct loop_filter_info {
- const uint8_t *mblim;
- const uint8_t *lim;
- const uint8_t *hev_thr;
-};
-
-
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
@@ -64,4 +57,18 @@ void vp9_loop_filter_frame(struct VP9Common *cm,
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
+
+typedef struct LoopFilterWorkerData {
+ const YV12_BUFFER_CONFIG *frame_buffer;
+ struct VP9Common *cm;
+ struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
+ // loopfilter. the planes are necessary as their state
+ // is changed during decode.
+ int start;
+ int stop;
+ int y_only;
+} LFWorkerData;
+
+// Operates on the rows described by LFWorkerData passed as 'arg1'.
+int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h
index a095258be..31a79b984 100644
--- a/libvpx/vp9/common/vp9_mv.h
+++ b/libvpx/vp9/common/vp9_mv.h
@@ -13,6 +13,8 @@
#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+
typedef struct {
int16_t row;
int16_t col;
@@ -28,4 +30,10 @@ typedef struct {
int32_t col;
} MV32;
+static void clamp_mv(MV *mv, int min_col, int max_col,
+ int min_row, int max_row) {
+ mv->col = clamp(mv->col, min_col, max_col);
+ mv->row = clamp(mv->row, min_row, max_row);
+}
+
#endif // VP9_COMMON_VP9_MV_H_
diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c
index ae009b0ff..3b72f41c2 100644
--- a/libvpx/vp9/common/vp9_mvref_common.c
+++ b/libvpx/vp9/common/vp9_mvref_common.c
@@ -11,6 +11,65 @@
#include "vp9/common/vp9_mvref_common.h"
#define MVREF_NEIGHBOURS 8
+
+typedef enum {
+ BOTH_ZERO = 0,
+ ZERO_PLUS_PREDICTED = 1,
+ BOTH_PREDICTED = 2,
+ NEW_PLUS_NON_INTRA = 3,
+ BOTH_NEW = 4,
+ INTRA_PLUS_NON_INTRA = 5,
+ BOTH_INTRA = 6,
+ INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+ 9, // DC_PRED
+ 9, // V_PRED
+ 9, // H_PRED
+ 9, // D45_PRED
+ 9, // D135_PRED
+ 9, // D117_PRED
+ 9, // D153_PRED
+ 9, // D27_PRED
+ 9, // D63_PRED
+ 9, // TM_PRED
+ 0, // NEARESTMV
+ 0, // NEARMV
+ 3, // ZEROMV
+ 1, // NEWMV
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+ BOTH_PREDICTED, // 0
+ NEW_PLUS_NON_INTRA, // 1
+ BOTH_NEW, // 2
+ ZERO_PLUS_PREDICTED, // 3
+ NEW_PLUS_NON_INTRA, // 4
+ INVALID_CASE, // 5
+ BOTH_ZERO, // 6
+ INVALID_CASE, // 7
+ INVALID_CASE, // 8
+ INTRA_PLUS_NON_INTRA, // 9
+ INTRA_PLUS_NON_INTRA, // 10
+ INVALID_CASE, // 11
+ INTRA_PLUS_NON_INTRA, // 12
+ INVALID_CASE, // 13
+ INVALID_CASE, // 14
+ INVALID_CASE, // 15
+ INVALID_CASE, // 16
+ INVALID_CASE, // 17
+ BOTH_INTRA // 18
+};
+
static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = {
// SB4X4
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
@@ -39,263 +98,212 @@ static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = {
// SB64X64
{{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}}
};
+
+static const int idx_n_column_to_subblock[4][2] = {
+ {1, 2},
+ {1, 3},
+ {3, 2},
+ {3, 3}
+};
+
// clamp_mv_ref
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) {
- mv->as_mv.col = clamp(mv->as_mv.col, xd->mb_to_left_edge - MV_BORDER,
- xd->mb_to_right_edge + MV_BORDER);
- mv->as_mv.row = clamp(mv->as_mv.row, xd->mb_to_top_edge - MV_BORDER,
- xd->mb_to_bottom_edge + MV_BORDER);
-}
-
-// Gets a candidate reference motion vector from the given mode info
-// structure if one exists that matches the given reference frame.
-static int get_matching_candidate(const MODE_INFO *candidate_mi,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *c_mv, int block_idx) {
- if (ref_frame == candidate_mi->mbmi.ref_frame[0]) {
- if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8)
- c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[0].as_int;
- else
- c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
- } else if (ref_frame == candidate_mi->mbmi.ref_frame[1]) {
- if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8)
- c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[1].as_int;
- else
- c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
- } else {
- return 0;
- }
-
- return 1;
+ clamp_mv(&mv->as_mv, xd->mb_to_left_edge - MV_BORDER,
+ xd->mb_to_right_edge + MV_BORDER,
+ xd->mb_to_top_edge - MV_BORDER,
+ xd->mb_to_bottom_edge + MV_BORDER);
}
-// Gets candidate reference motion vector(s) from the given mode info
-// structure if they exists and do NOT match the given reference frame.
-static void get_non_matching_candidates(const MODE_INFO *candidate_mi,
- MV_REFERENCE_FRAME ref_frame,
- MV_REFERENCE_FRAME *c_ref_frame,
- int_mv *c_mv,
- MV_REFERENCE_FRAME *c2_ref_frame,
- int_mv *c2_mv) {
-
- c_mv->as_int = 0;
- c2_mv->as_int = 0;
- *c_ref_frame = INTRA_FRAME;
- *c2_ref_frame = INTRA_FRAME;
-
- // If first candidate not valid neither will be.
- if (candidate_mi->mbmi.ref_frame[0] > INTRA_FRAME) {
- // First candidate
- if (candidate_mi->mbmi.ref_frame[0] != ref_frame) {
- *c_ref_frame = candidate_mi->mbmi.ref_frame[0];
- c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
- }
-
- // Second candidate
- if ((candidate_mi->mbmi.ref_frame[1] > INTRA_FRAME) &&
- (candidate_mi->mbmi.ref_frame[1] != ref_frame) &&
- (candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) {
- *c2_ref_frame = candidate_mi->mbmi.ref_frame[1];
- c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
- }
- }
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate,
+ int check_sub_blocks, int which_mv,
+ int search_col, int block_idx) {
+ return (check_sub_blocks && candidate->mbmi.sb_type < BLOCK_SIZE_SB8X8
+ ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+ .as_mv[which_mv]
+ : candidate->mbmi.mv[which_mv]);
}
// Performs mv sign inversion if indicated by the reference frame combination.
-static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame,
- MV_REFERENCE_FRAME candidate_ref_frame,
- int_mv *candidate_mv, int *ref_sign_bias) {
+static INLINE int_mv scale_mv(const MODE_INFO *candidate, const int which_mv,
+ const MV_REFERENCE_FRAME this_ref_frame,
+ const int *ref_sign_bias) {
+ int_mv return_mv = candidate->mbmi.mv[which_mv];
// Sign inversion where appropriate.
- if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
- candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
- candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
+ if (ref_sign_bias[candidate->mbmi.ref_frame[which_mv]] !=
+ ref_sign_bias[this_ref_frame]) {
+ return_mv.as_mv.row *= -1;
+ return_mv.as_mv.col *= -1;
}
+ return return_mv;
}
-// Add a candidate mv.
-// Discard if it has already been seen.
-static void add_candidate_mv(int_mv *mv_list, int *mv_scores,
- int *candidate_count, int_mv candidate_mv,
- int weight) {
- if (*candidate_count == 0) {
- mv_list[0].as_int = candidate_mv.as_int;
- mv_scores[0] = weight;
- *candidate_count += 1;
- } else if ((*candidate_count == 1) &&
- (candidate_mv.as_int != mv_list[0].as_int)) {
- mv_list[1].as_int = candidate_mv.as_int;
- mv_scores[1] = weight;
- *candidate_count += 1;
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(MV) \
+ if (refmv_count) { \
+ if ((MV).as_int != mv_ref_list[0].as_int) { \
+ mv_ref_list[refmv_count] = (MV); \
+ goto Done; \
+ } \
+ } else { \
+ mv_ref_list[refmv_count++] = (MV); \
+ }
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \
+ if ((CANDIDATE)->mbmi.ref_frame[0] != ref_frame) { \
+ ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \
+ } \
+ if ((CANDIDATE)->mbmi.ref_frame[1] != ref_frame && \
+ (CANDIDATE)->mbmi.ref_frame[1] > INTRA_FRAME && \
+ (CANDIDATE)->mbmi.mv[1].as_int != (CANDIDATE)->mbmi.mv[0].as_int) { \
+ ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \
}
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const int mi_col, const int mi_row,
+ const int cur_tile_mi_col_start,
+ const int cur_tile_mi_col_end, const int mi_rows,
+ const int (*mv_ref_search)[2], int idx) {
+ int mi_search_col;
+ const int mi_search_row = mi_row + mv_ref_search[idx][1];;
+
+ // Check that the candidate is within the border. We only need to check
+ // the left side because all the positive right side ones are for blocks that
+ // are large enough to support the + value they have within their border.
+ if (mi_search_row < 0)
+ return 0;
+
+ mi_search_col = mi_col + mv_ref_search[idx][0];
+ if (mi_search_col < cur_tile_mi_col_start)
+ return 0;
+
+ return 1;
}
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
-//
void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
- MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list, int *ref_sign_bias,
- int block_idx) {
- int i;
- MODE_INFO *candidate_mi;
- MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
- int_mv c_refmv;
- int_mv c2_refmv;
- MV_REFERENCE_FRAME c_ref_frame;
- MV_REFERENCE_FRAME c2_ref_frame;
- int candidate_scores[MAX_MV_REF_CANDIDATES] = { 0 };
+ const MODE_INFO *lf_here,
+ const MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list, const int *ref_sign_bias,
+ const int block_idx,
+ const int mi_row, const int mi_col) {
+ int idx;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int refmv_count = 0;
const int (*mv_ref_search)[2] = mv_ref_blocks[mbmi->sb_type];
- const int mi_col = get_mi_col(xd);
- const int mi_row = get_mi_row(xd);
- int intra_count = 0;
- int zero_count = 0;
- int newmv_count = 0;
- int x_idx = 0, y_idx = 0;
-
- // Blank the reference vector lists and other local structures.
- vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES);
-
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
- x_idx = block_idx & 1;
- y_idx = block_idx >> 1;
- }
-
- // We first scan for candidate vectors that match the current reference frame
- // Look at nearest neigbours
- for (i = 0; i < 2; ++i) {
- const int mi_search_col = mi_col + mv_ref_search[i][0];
- const int mi_search_row = mi_row + mv_ref_search[i][1];
- if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
- (mi_search_col < cm->cur_tile_mi_col_end) &&
- (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
- int b;
-
- candidate_mi = here + mv_ref_search[i][0] +
- (mv_ref_search[i][1] * xd->mode_info_stride);
-
- if (block_idx >= 0) {
- if (mv_ref_search[i][0])
- b = 1 + y_idx * 2;
- else
- b = 2 + x_idx;
- } else {
- b = -1;
- }
- if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, b)) {
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c_refmv, 16);
+ const MODE_INFO *candidate;
+ const int check_sub_blocks = block_idx >= 0;
+ int different_ref_found = 0;
+ int context_counter = 0;
+
+ // Blank the reference vector list
+ vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // The nearest 2 blocks are treated differently
+ // if the size < 8x8 we get the mv from the bmi substructure,
+ // and we also need to keep a mode count.
+ for (idx = 0; idx < 2; ++idx) {
+ if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start,
+ cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx))
+ continue;
+
+ candidate = here + mv_ref_search[idx][0]
+ + mv_ref_search[idx][1] * xd->mode_info_stride;
+
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mbmi.mode];
+
+ // Check if the candidate comes from the same reference frame.
+ if (candidate->mbmi.ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 0,
+ mv_ref_search[idx][0], block_idx));
+ different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
+ } else {
+ different_ref_found = 1;
+ if (candidate->mbmi.ref_frame[1] == ref_frame) {
+ // Add second motion vector if it has the same ref_frame.
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 1,
+ mv_ref_search[idx][0], block_idx));
}
-
- // Count number of neihgbours coded intra and zeromv
- intra_count += (candidate_mi->mbmi.mode < NEARESTMV);
- zero_count += (candidate_mi->mbmi.mode == ZEROMV);
- newmv_count += (candidate_mi->mbmi.mode >= NEWMV);
}
}
- // More distant neigbours
- for (i = 2; (i < MVREF_NEIGHBOURS) &&
- (refmv_count < MAX_MV_REF_CANDIDATES); ++i) {
- const int mi_search_col = mi_col + mv_ref_search[i][0];
- const int mi_search_row = mi_row + mv_ref_search[i][1];
- if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
- (mi_search_col < cm->cur_tile_mi_col_end) &&
- (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
- candidate_mi = here + mv_ref_search[i][0] +
- (mv_ref_search[i][1] * xd->mode_info_stride);
-
- if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) {
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c_refmv, 16);
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (; idx < MVREF_NEIGHBOURS; ++idx) {
+ if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start,
+ cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx))
+ continue;
+
+ candidate = here + mv_ref_search[idx][0]
+ + mv_ref_search[idx][1] * xd->mode_info_stride;
+
+ if (candidate->mbmi.ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(candidate->mbmi.mv[0]);
+ different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
+ } else {
+ different_ref_found = 1;
+ if (candidate->mbmi.ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(candidate->mbmi.mv[1]);
}
}
}
- // Look in the last frame if it exists
- if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) {
- candidate_mi = lf_here;
- if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) {
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c_refmv, 16);
+ // Check the last frame's mode and mv info.
+ if (lf_here != NULL) {
+ if (lf_here->mbmi.ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(lf_here->mbmi.mv[0]);
+ } else if (lf_here->mbmi.ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(lf_here->mbmi.mv[1]);
}
}
- // If we have not found enough candidates consider ones where the
- // reference frame does not match. Break out when we have
- // MAX_MV_REF_CANDIDATES candidates.
- // Look first at spatial neighbours
- for (i = 0; (i < MVREF_NEIGHBOURS) &&
- (refmv_count < MAX_MV_REF_CANDIDATES); ++i) {
- const int mi_search_col = mi_col + mv_ref_search[i][0];
- const int mi_search_row = mi_row + mv_ref_search[i][1];
- if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
- (mi_search_col < cm->cur_tile_mi_col_end) &&
- (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
- candidate_mi = here + mv_ref_search[i][0] +
- (mv_ref_search[i][1] * xd->mode_info_stride);
-
- get_non_matching_candidates(candidate_mi, ref_frame,
- &c_ref_frame, &c_refmv,
- &c2_ref_frame, &c2_refmv);
-
- if (c_ref_frame != INTRA_FRAME) {
- scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c_refmv, 1);
- }
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found) {
+ for (idx = 0; idx < MVREF_NEIGHBOURS; ++idx) {
+ if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start,
+ cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx))
+ continue;
- if (c2_ref_frame != INTRA_FRAME) {
- scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c2_refmv, 1);
- }
- }
- }
+ candidate = here + mv_ref_search[idx][0]
+ + mv_ref_search[idx][1] * xd->mode_info_stride;
- // Look at the last frame if it exists
- if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) {
- candidate_mi = lf_here;
- get_non_matching_candidates(candidate_mi, ref_frame,
- &c_ref_frame, &c_refmv,
- &c2_ref_frame, &c2_refmv);
-
- if (c_ref_frame != INTRA_FRAME) {
- scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c_refmv, 1);
- }
+ // If the candidate is INTRA we don't want to consider its mv.
+ if (candidate->mbmi.ref_frame[0] == INTRA_FRAME)
+ continue;
- if (c2_ref_frame != INTRA_FRAME) {
- scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
- add_candidate_mv(mv_ref_list, candidate_scores,
- &refmv_count, c2_refmv, 1);
+ IF_DIFF_REF_FRAME_ADD_MV(candidate);
}
}
- if (!intra_count) {
- if (!newmv_count) {
- // 0 = both zero mv
- // 1 = one zero mv + one a predicted mv
- // 2 = two predicted mvs
- mbmi->mb_mode_context[ref_frame] = 2 - zero_count;
- } else {
- // 3 = one predicted/zero and one new mv
- // 4 = two new mvs
- mbmi->mb_mode_context[ref_frame] = 2 + newmv_count;
- }
- } else {
- // 5 = one intra neighbour + x
- // 6 = two intra neighbours
- mbmi->mb_mode_context[ref_frame] = 4 + intra_count;
+ // Since we still don't have a candidate we'll try the last frame.
+ if (lf_here != NULL && lf_here->mbmi.ref_frame[0] != INTRA_FRAME) {
+ IF_DIFF_REF_FRAME_ADD_MV(lf_here);
}
+ Done:
+
+ mbmi->mb_mode_context[ref_frame] = counter_to_context[context_counter];
+
// Clamp vectors
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- clamp_mv_ref(xd, &mv_ref_list[i]);
+ for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) {
+ clamp_mv_ref(xd, &mv_ref_list[idx]);
}
}
+
+#undef ADD_MV_REF_LIST
+#undef IF_DIFF_REF_FRAME_ADD_MV
diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h
index 7290f10ab..c5f89eb57 100644
--- a/libvpx/vp9/common/vp9_mvref_common.h
+++ b/libvpx/vp9/common/vp9_mvref_common.h
@@ -17,11 +17,13 @@
void vp9_find_mv_refs_idx(VP9_COMMON *cm,
MACROBLOCKD *xd,
MODE_INFO *here,
- MODE_INFO *lf_here,
- MV_REFERENCE_FRAME ref_frame,
+ const MODE_INFO *lf_here,
+ const MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
- int *ref_sign_bias,
- int block_idx);
+ const int *ref_sign_bias,
+ const int block_idx,
+ const int mi_row,
+ const int mi_col);
static INLINE void vp9_find_mv_refs(VP9_COMMON *cm,
MACROBLOCKD *xd,
@@ -29,9 +31,10 @@ static INLINE void vp9_find_mv_refs(VP9_COMMON *cm,
MODE_INFO *lf_here,
MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
- int *ref_sign_bias) {
+ int *ref_sign_bias,
+ int mi_row, int mi_col) {
vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame,
- mv_ref_list, ref_sign_bias, -1);
+ mv_ref_list, ref_sign_bias, -1, mi_row, mi_col);
}
#endif // VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index f31f24b26..152a93293 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -42,7 +42,7 @@ typedef struct frame_contexts {
vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1];
- vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+ vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
@@ -59,12 +59,12 @@ typedef struct {
unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES];
unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES];
unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
- vp9_coeff_count_model coef[TX_SIZE_MAX_SB][BLOCK_TYPES];
- unsigned int eob_branch[TX_SIZE_MAX_SB][BLOCK_TYPES][REF_TYPES]
+ vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
+ unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
[COEF_BANDS][PREV_COEF_CONTEXTS];
unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS];
- unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
+ unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES];
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
unsigned int single_ref[REF_CONTEXTS][2][2];
@@ -240,8 +240,7 @@ static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd,
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
}
-static int check_bsize_coverage(VP9_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
+static int check_bsize_coverage(VP9_COMMON *cm, int mi_row, int mi_col,
BLOCK_SIZE_TYPE bsize) {
int bsl = mi_width_log2(bsize), bs = 1 << bsl;
int ms = bs / 2;
@@ -278,14 +277,6 @@ static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
xd->right_available = (mi_col + bw < cm->cur_tile_mi_col_end);
}
-static int get_mi_row(const MACROBLOCKD *xd) {
- return ((-xd->mb_to_top_edge) >> (3 + LOG2_MI_SIZE));
-}
-
-static int get_mi_col(const MACROBLOCKD *xd) {
- return ((-xd->mb_to_left_edge) >> (3 + LOG2_MI_SIZE));
-}
-
static int get_token_alloc(int mb_rows, int mb_cols) {
return mb_rows * mb_cols * (48 * 16 + 4);
}
diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c
index e8bcdea82..795962a71 100644
--- a/libvpx/vp9/common/vp9_pred_common.c
+++ b/libvpx/vp9/common/vp9_pred_common.c
@@ -55,34 +55,28 @@ unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
}
// Returns a context number for the given MB prediction signal
unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
- int pred_context;
const MODE_INFO *const mi = xd->mode_info_context;
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- if (above_in_image && left_in_image) { // both edges available
- if (left_mbmi->ref_frame[0] == INTRA_FRAME &&
- above_mbmi->ref_frame[0] == INTRA_FRAME) { // intra/intra (3)
- pred_context = 3;
- } else { // intra/inter (1) or inter/inter (0)
- pred_context = left_mbmi->ref_frame[0] == INTRA_FRAME ||
- above_mbmi->ref_frame[0] == INTRA_FRAME;
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+ const int left_intra = !is_inter_block(left_mbmi);
+ const int above_intra = !is_inter_block(above_mbmi);
- // inter: 0, intra: 2
- pred_context = 2 * (edge_mbmi->ref_frame[0] == INTRA_FRAME);
- } else {
- pred_context = 0;
- }
- assert(pred_context >= 0 && pred_context < INTRA_INTER_CONTEXTS);
- return pred_context;
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ // 0 - inter/inter, inter/--, --/inter, --/--
+ // 1 - intra/inter, inter/intra
+ // 2 - intra/--, --/intra
+ // 3 - intra/intra
+ if (above_in_image && left_in_image) // both edges available
+ return left_intra && above_intra ? 3
+ : left_intra || above_intra;
+ else if (above_in_image || left_in_image) // one edge available
+ return 2 * (above_in_image ? above_intra : left_intra);
+ else
+ return 0;
}
// Returns a context number for the given MB prediction signal
unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h
index e4b6575e3..238290b41 100644
--- a/libvpx/vp9/common/vp9_pred_common.h
+++ b/libvpx/vp9/common/vp9_pred_common.h
@@ -110,9 +110,9 @@ unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd);
static const vp9_prob *get_tx_probs(BLOCK_SIZE_TYPE bsize, uint8_t context,
const struct tx_probs *tx_probs) {
- if (bsize < BLOCK_SIZE_MB16X16)
+ if (bsize < BLOCK_16X16)
return tx_probs->p8x8[context];
- else if (bsize < BLOCK_SIZE_SB32X32)
+ else if (bsize < BLOCK_32X32)
return tx_probs->p16x16[context];
else
return tx_probs->p32x32[context];
@@ -127,9 +127,9 @@ static const vp9_prob *get_tx_probs2(const MACROBLOCKD *xd,
static void update_tx_counts(BLOCK_SIZE_TYPE bsize, uint8_t context,
TX_SIZE tx_size, struct tx_counts *tx_counts) {
- if (bsize >= BLOCK_SIZE_SB32X32)
+ if (bsize >= BLOCK_32X32)
tx_counts->p32x32[context][tx_size]++;
- else if (bsize >= BLOCK_SIZE_MB16X16)
+ else if (bsize >= BLOCK_16X16)
tx_counts->p16x16[context][tx_size]++;
else
tx_counts->p8x8[context][tx_size]++;
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index 63e5646ad..0b65e0610 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -197,14 +197,14 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd,
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
- const int_mv *src_mv,
+ const MV *src_mv,
const struct scale_factors *scale,
int w, int h, int weight,
const struct subpix_fn_table *subpix,
enum mv_precision precision) {
const MV32 mv = precision == MV_PRECISION_Q4
- ? scale->scale_mv_q4(&src_mv->as_mv, scale)
- : scale->scale_mv_q3_to_q4(&src_mv->as_mv, scale);
+ ? scale->scale_mv_q4(src_mv, scale)
+ : scale->scale_mv_q3_to_q4(src_mv, scale);
const int subpel_x = mv.col & 15;
const int subpel_y = mv.row & 15;
@@ -220,45 +220,44 @@ static INLINE int round_mv_comp_q4(int value) {
return (value < 0 ? value - 2 : value + 2) / 4;
}
-static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int idx) {
- const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.row +
- mb->mode_info_context->bmi[1].as_mv[idx].as_mv.row +
- mb->mode_info_context->bmi[2].as_mv[idx].as_mv.row +
- mb->mode_info_context->bmi[3].as_mv[idx].as_mv.row;
- return round_mv_comp_q4(temp);
+static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
+ MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row +
+ mi->bmi[1].as_mv[idx].as_mv.row +
+ mi->bmi[2].as_mv[idx].as_mv.row +
+ mi->bmi[3].as_mv[idx].as_mv.row),
+ round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col +
+ mi->bmi[1].as_mv[idx].as_mv.col +
+ mi->bmi[2].as_mv[idx].as_mv.col +
+ mi->bmi[3].as_mv[idx].as_mv.col) };
+ return res;
}
-static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int idx) {
- const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.col +
- mb->mode_info_context->bmi[1].as_mv[idx].as_mv.col +
- mb->mode_info_context->bmi[2].as_mv[idx].as_mv.col +
- mb->mode_info_context->bmi[3].as_mv[idx].as_mv.col;
- return round_mv_comp_q4(temp);
-}
+
// TODO(jkoleszar): yet another mv clamping function :-(
MV clamp_mv_to_umv_border_sb(const MV *src_mv,
int bwl, int bhl, int ss_x, int ss_y,
int mb_to_left_edge, int mb_to_top_edge,
int mb_to_right_edge, int mb_to_bottom_edge) {
- /* If the MV points so far into the UMV border that no visible pixels
- * are used for reconstruction, the subpel part of the MV can be
- * discarded and the MV limited to 16 pixels with equivalent results.
- */
+ // If the MV points so far into the UMV border that no visible pixels
+ // are used for reconstruction, the subpel part of the MV can be
+ // discarded and the MV limited to 16 pixels with equivalent results.
const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4;
const int spel_right = spel_left - (1 << 4);
const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4;
const int spel_bottom = spel_top - (1 << 4);
- MV clamped_mv;
-
+ MV clamped_mv = {
+ src_mv->row << (1 - ss_y),
+ src_mv->col << (1 - ss_x)
+ };
assert(ss_x <= 1);
assert(ss_y <= 1);
- clamped_mv.col = clamp(src_mv->col << (1 - ss_x),
- (mb_to_left_edge << (1 - ss_x)) - spel_left,
- (mb_to_right_edge << (1 - ss_x)) + spel_right);
- clamped_mv.row = clamp(src_mv->row << (1 - ss_y),
- (mb_to_top_edge << (1 - ss_y)) - spel_top,
- (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
+
+ clamp_mv(&clamped_mv, (mb_to_left_edge << (1 - ss_x)) - spel_left,
+ (mb_to_right_edge << (1 - ss_x)) + spel_right,
+ (mb_to_top_edge << (1 - ss_y)) - spel_top,
+ (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
+
return clamped_mv;
}
@@ -280,15 +279,14 @@ static void build_inter_predictors(int plane, int block,
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
const int x = 4 * (block & ((1 << bwl) - 1)), y = 4 * (block >> bwl);
- const int use_second_ref = xd->mode_info_context->mbmi.ref_frame[1] > 0;
+ const MODE_INFO *const mi = xd->mode_info_context;
+ const int use_second_ref = mi->mbmi.ref_frame[1] > 0;
int which_mv;
assert(x < (4 << bwl));
assert(y < (4 << bhl));
- assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 ||
- 4 << pred_w == (4 << bwl));
- assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 ||
- 4 << pred_h == (4 << bhl));
+ assert(mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 || 4 << pred_w == (4 << bwl));
+ assert(mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 || 4 << pred_h == (4 << bhl));
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
// source
@@ -301,44 +299,30 @@ static void build_inter_predictors(int plane, int block,
// dest
uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x;
- // motion vector
- const MV *mv;
- MV split_chroma_mv;
- int_mv clamped_mv;
-
- if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
- if (plane == 0) {
- mv = &xd->mode_info_context->bmi[block].as_mv[which_mv].as_mv;
- } else {
- // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
- // same MV (the average of the 4 luma MVs) but we could do something
- // smarter for non-4:2:0. Just punt for now, pending the changes to get
- // rid of SPLITMV mode entirely.
- split_chroma_mv.row = mi_mv_pred_row_q4(xd, which_mv);
- split_chroma_mv.col = mi_mv_pred_col_q4(xd, which_mv);
- mv = &split_chroma_mv;
- }
- } else {
- mv = &xd->mode_info_context->mbmi.mv[which_mv].as_mv;
- }
-
- /* TODO(jkoleszar): This clamping is done in the incorrect place for the
- * scaling case. It needs to be done on the scaled MV, not the pre-scaling
- * MV. Note however that it performs the subsampling aware scaling so
- * that the result is always q4.
- */
- clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl,
- xd->plane[plane].subsampling_x,
- xd->plane[plane].subsampling_y,
- xd->mb_to_left_edge,
- xd->mb_to_top_edge,
- xd->mb_to_right_edge,
- xd->mb_to_bottom_edge);
+ // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
+ // same MV (the average of the 4 luma MVs) but we could do something
+ // smarter for non-4:2:0. Just punt for now, pending the changes to get
+ // rid of SPLITMV mode entirely.
+ const MV mv = mi->mbmi.sb_type < BLOCK_SIZE_SB8X8
+ ? (plane == 0 ? mi->bmi[block].as_mv[which_mv].as_mv
+ : mi_mv_pred_q4(mi, which_mv))
+ : mi->mbmi.mv[which_mv].as_mv;
+
+ // TODO(jkoleszar): This clamping is done in the incorrect place for the
+ // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+ // MV. Note however that it performs the subsampling aware scaling so
+ // that the result is always q4.
+ const MV res_mv = clamp_mv_to_umv_border_sb(&mv, bwl, bhl,
+ xd->plane[plane].subsampling_x,
+ xd->plane[plane].subsampling_y,
+ xd->mb_to_left_edge,
+ xd->mb_to_top_edge,
+ xd->mb_to_right_edge,
+ xd->mb_to_bottom_edge);
scale->set_scaled_offsets(scale, arg->y + y, arg->x + x);
-
vp9_build_inter_predictor(pre, pre_stride,
dst, arg->dst_stride[plane],
- &clamped_mv, &xd->scale_factor[which_mv],
+ &res_mv, &xd->scale_factor[which_mv],
4 << pred_w, 4 << pred_h, which_mv,
&xd->subpix, MV_PRECISION_Q4);
}
@@ -400,7 +384,7 @@ void vp9_setup_scale_factors(VP9_COMMON *cm, int i) {
const int ref = cm->active_ref_idx[i];
struct scale_factors *const sf = &cm->active_ref_scale[i];
if (ref >= NUM_YV12_BUFFERS) {
- memset(sf, 0, sizeof(*sf));
+ vp9_zero(*sf);
} else {
YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[ref];
vp9_setup_scale_factors_for_frame(sf,
diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h
index e37750dea..6ec7323e1 100644
--- a/libvpx/vp9/common/vp9_reconinter.h
+++ b/libvpx/vp9/common/vp9_reconinter.h
@@ -39,7 +39,7 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
- const int_mv *mv_q3,
+ const MV *mv_q3,
const struct scale_factors *scale,
int w, int h, int do_avg,
const struct subpix_fn_table *subpix,
diff --git a/libvpx/vp9/common/vp9_rtcd_defs.sh b/libvpx/vp9/common/vp9_rtcd_defs.sh
index c357ef62a..6bb3cb888 100644
--- a/libvpx/vp9/common/vp9_rtcd_defs.sh
+++ b/libvpx/vp9/common/vp9_rtcd_defs.sh
@@ -7,9 +7,7 @@ cat <<EOF
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -22,7 +20,11 @@ EOF
}
forward_decls vp9_common_forward_decls
-[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
+# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
+[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse2_x86inc=sse2 && ssse3_x86inc=ssse3
+
+# this variable is for functions that are 64 bit only.
+[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3
#
# Dequant
@@ -47,7 +49,7 @@ prototype void vp9_d27_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d27_predictor_4x4
prototype void vp9_d45_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_d45_predictor_4x4
+specialize vp9_d45_predictor_4x4 ssse3
prototype void vp9_d63_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_4x4
@@ -86,7 +88,7 @@ prototype void vp9_d27_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d27_predictor_8x8
prototype void vp9_d45_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_d45_predictor_8x8
+specialize vp9_d45_predictor_8x8 ssse3
prototype void vp9_d63_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_8x8
@@ -125,7 +127,7 @@ prototype void vp9_d27_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d27_predictor_16x16
prototype void vp9_d45_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_d45_predictor_16x16
+specialize vp9_d45_predictor_16x16 ssse3
prototype void vp9_d63_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_16x16
@@ -164,7 +166,7 @@ prototype void vp9_d27_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d27_predictor_32x32
prototype void vp9_d45_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_d45_predictor_32x32
+specialize vp9_d45_predictor_32x32 ssse3
prototype void vp9_d63_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_32x32
@@ -214,7 +216,7 @@ fi
# Loopfilter
#
prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w sse2
+specialize vp9_mb_lpf_vertical_edge_w sse2 neon
prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_mbloop_filter_vertical_edge sse2 neon
@@ -223,7 +225,7 @@ prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8
specialize vp9_loop_filter_vertical_edge mmx neon
prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mb_lpf_horizontal_edge_w sse2
+specialize vp9_mb_lpf_horizontal_edge_w sse2 neon
prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_mbloop_filter_horizontal_edge sse2 neon
@@ -265,10 +267,10 @@ specialize vp9_blend_b
# Sub Pixel Filters
#
prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_copy sse2
+specialize vp9_convolve_copy $sse2_x86inc
prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_avg sse2
+specialize vp9_convolve_avg $sse2_x86inc
prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8 ssse3 neon
@@ -297,14 +299,17 @@ specialize vp9_short_idct4x4_1_add sse2
prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2
+prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct8x8_1_add sse2
+
prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_add sse2 neon
prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct10_8x8_add sse2
-prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output"
-specialize vp9_short_idct1_8x8
+prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct16x16_1_add sse2
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2
@@ -312,18 +317,12 @@ specialize vp9_short_idct16x16_add sse2
prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct10_16x16_add sse2
-prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output"
-specialize vp9_short_idct1_16x16
-
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_add sse2
prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
specialize vp9_short_idct1_32x32
-prototype void vp9_short_idct10_32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_32x32_add
-
prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_short_iht4x4_add sse2
@@ -702,12 +701,10 @@ specialize vp9_get_mb_ss mmx sse2
# ENCODEMB INVOKE
prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"
-specialize vp9_block_error sse2
+specialize vp9_block_error $sse2_x86inc
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
-specialize vp9_subtract_block sse2
-
-[ $arch = "x86_64" ] && ssse3_x86_64=ssse3
+specialize vp9_subtract_block $sse2_x86inc
prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
specialize vp9_quantize_b $ssse3_x86_64
@@ -719,13 +716,11 @@ specialize vp9_quantize_b_32x32 $ssse3_x86_64
# Structured Similarity (SSIM)
#
if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
- [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
-
prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp9_ssim_parms_8x8 $sse2_on_x86_64
+ specialize vp9_ssim_parms_8x8 $sse2_x86_64
prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp9_ssim_parms_16x16 $sse2_on_x86_64
+ specialize vp9_ssim_parms_16x16 $sse2_x86_64
fi
# fdct functions
diff --git a/libvpx/vp9/common/vp9_treecoder.h b/libvpx/vp9/common/vp9_treecoder.h
index ebcd4116f..31182c35c 100644
--- a/libvpx/vp9/common/vp9_treecoder.h
+++ b/libvpx/vp9/common/vp9_treecoder.h
@@ -79,4 +79,22 @@ static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) {
return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
}
+static INLINE vp9_prob merge_probs(vp9_prob pre_prob, vp9_prob prob,
+ const unsigned int ct[2],
+ unsigned int count_sat,
+ unsigned int max_update_factor) {
+ const unsigned int count = MIN(ct[0] + ct[1], count_sat);
+ const unsigned int factor = max_update_factor * count / count_sat;
+ return weighted_prob(pre_prob, prob, factor);
+}
+
+static INLINE vp9_prob merge_probs2(vp9_prob pre_prob,
+ const unsigned int ct[2],
+ unsigned int count_sat,
+ unsigned int max_update_factor) {
+ return merge_probs(pre_prob, get_binary_prob(ct[0], ct[1]), ct, count_sat,
+ max_update_factor);
+}
+
+
#endif // VP9_COMMON_VP9_TREECODER_H_
diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index a1e14b482..8f740f412 100644
--- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -523,9 +523,9 @@ void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride,
{ \
__m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
d0 = _mm_unpacklo_epi8(d0, zero); \
- in_x = _mm_add_epi16(in_x, d0); \
- in_x = _mm_packus_epi16(in_x, in_x); \
- _mm_storel_epi64((__m128i *)(dest), in_x); \
+ d0 = _mm_add_epi16(in_x, d0); \
+ d0 = _mm_packus_epi16(d0, d0); \
+ _mm_storel_epi64((__m128i *)(dest), d0); \
dest += stride; \
}
@@ -597,6 +597,27 @@ void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, in7);
}
+void vp9_short_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a;
+
+ a = dct_const_round_shift(input[0] * cospi_16_64);
+ a = dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 5);
+
+ dc_value = _mm_set1_epi16(a);
+
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+}
+
// perform 8x8 transpose
static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
@@ -1449,6 +1470,38 @@ void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
}
}
+void vp9_short_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a, i;
+
+ a = dct_const_round_shift(input[0] * cospi_16_64);
+ a = dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 6);
+
+ dc_value = _mm_set1_epi16(a);
+
+ for (i = 0; i < 2; ++i) {
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ RECON_AND_STORE(dest, dc_value);
+ dest += 8 - (stride * 16);
+ }
+}
+
static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
__m128i tbuf[8];
array_transpose_8x8(res0, res0);
@@ -2760,6 +2813,12 @@ void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,
}
}
+#define LOAD_DQCOEFF(reg, input) \
+ { \
+ reg = _mm_load_si128((__m128i *) input); \
+ input += 8; \
+ } \
+
void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
@@ -2827,48 +2886,126 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {
stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29,
stp2_30, stp2_31;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- int i, j;
+ int i, j, i32;
+ __m128i zero_idx[16];
+ int zero_flag[2];
// We work on a 8x32 block each time, and loop 8 times for 2-D 32x32 idct.
for (i = 0; i < 8; i++) {
+ i32 = (i << 5);
if (i < 4) {
// First 1-D idct
// Load input data.
- in0 = _mm_load_si128((__m128i *)input);
- in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
- in16 = _mm_load_si128((__m128i *)(input + 8 * 2));
- in24 = _mm_load_si128((__m128i *)(input + 8 * 3));
- in1 = _mm_load_si128((__m128i *)(input + 8 * 4));
- in9 = _mm_load_si128((__m128i *)(input + 8 * 5));
- in17 = _mm_load_si128((__m128i *)(input + 8 * 6));
- in25 = _mm_load_si128((__m128i *)(input + 8 * 7));
- in2 = _mm_load_si128((__m128i *)(input + 8 * 8));
- in10 = _mm_load_si128((__m128i *)(input + 8 * 9));
- in18 = _mm_load_si128((__m128i *)(input + 8 * 10));
- in26 = _mm_load_si128((__m128i *)(input + 8 * 11));
- in3 = _mm_load_si128((__m128i *)(input + 8 * 12));
- in11 = _mm_load_si128((__m128i *)(input + 8 * 13));
- in19 = _mm_load_si128((__m128i *)(input + 8 * 14));
- in27 = _mm_load_si128((__m128i *)(input + 8 * 15));
-
- in4 = _mm_load_si128((__m128i *)(input + 8 * 16));
- in12 = _mm_load_si128((__m128i *)(input + 8 * 17));
- in20 = _mm_load_si128((__m128i *)(input + 8 * 18));
- in28 = _mm_load_si128((__m128i *)(input + 8 * 19));
- in5 = _mm_load_si128((__m128i *)(input + 8 * 20));
- in13 = _mm_load_si128((__m128i *)(input + 8 * 21));
- in21 = _mm_load_si128((__m128i *)(input + 8 * 22));
- in29 = _mm_load_si128((__m128i *)(input + 8 * 23));
- in6 = _mm_load_si128((__m128i *)(input + 8 * 24));
- in14 = _mm_load_si128((__m128i *)(input + 8 * 25));
- in22 = _mm_load_si128((__m128i *)(input + 8 * 26));
- in30 = _mm_load_si128((__m128i *)(input + 8 * 27));
- in7 = _mm_load_si128((__m128i *)(input + 8 * 28));
- in15 = _mm_load_si128((__m128i *)(input + 8 * 29));
- in23 = _mm_load_si128((__m128i *)(input + 8 * 30));
- in31 = _mm_load_si128((__m128i *)(input + 8 * 31));
-
- input += 256;
+ LOAD_DQCOEFF(in0, input);
+ LOAD_DQCOEFF(in8, input);
+ LOAD_DQCOEFF(in16, input);
+ LOAD_DQCOEFF(in24, input);
+ LOAD_DQCOEFF(in1, input);
+ LOAD_DQCOEFF(in9, input);
+ LOAD_DQCOEFF(in17, input);
+ LOAD_DQCOEFF(in25, input);
+ LOAD_DQCOEFF(in2, input);
+ LOAD_DQCOEFF(in10, input);
+ LOAD_DQCOEFF(in18, input);
+ LOAD_DQCOEFF(in26, input);
+ LOAD_DQCOEFF(in3, input);
+ LOAD_DQCOEFF(in11, input);
+ LOAD_DQCOEFF(in19, input);
+ LOAD_DQCOEFF(in27, input);
+
+ LOAD_DQCOEFF(in4, input);
+ LOAD_DQCOEFF(in12, input);
+ LOAD_DQCOEFF(in20, input);
+ LOAD_DQCOEFF(in28, input);
+ LOAD_DQCOEFF(in5, input);
+ LOAD_DQCOEFF(in13, input);
+ LOAD_DQCOEFF(in21, input);
+ LOAD_DQCOEFF(in29, input);
+ LOAD_DQCOEFF(in6, input);
+ LOAD_DQCOEFF(in14, input);
+ LOAD_DQCOEFF(in22, input);
+ LOAD_DQCOEFF(in30, input);
+ LOAD_DQCOEFF(in7, input);
+ LOAD_DQCOEFF(in15, input);
+ LOAD_DQCOEFF(in23, input);
+ LOAD_DQCOEFF(in31, input);
+
+ // checking if all entries are zero
+ zero_idx[0] = _mm_or_si128(in0, in1);
+ zero_idx[1] = _mm_or_si128(in2, in3);
+ zero_idx[2] = _mm_or_si128(in4, in5);
+ zero_idx[3] = _mm_or_si128(in6, in7);
+ zero_idx[4] = _mm_or_si128(in8, in9);
+ zero_idx[5] = _mm_or_si128(in10, in11);
+ zero_idx[6] = _mm_or_si128(in12, in13);
+ zero_idx[7] = _mm_or_si128(in14, in15);
+ zero_idx[8] = _mm_or_si128(in16, in17);
+ zero_idx[9] = _mm_or_si128(in18, in19);
+ zero_idx[10] = _mm_or_si128(in20, in21);
+ zero_idx[11] = _mm_or_si128(in22, in23);
+ zero_idx[12] = _mm_or_si128(in24, in25);
+ zero_idx[13] = _mm_or_si128(in26, in27);
+ zero_idx[14] = _mm_or_si128(in28, in29);
+ zero_idx[15] = _mm_or_si128(in30, in31);
+
+ zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+ zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+ zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+ zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+ zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+ zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+ zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+ zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]);
+
+ zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+ zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+ zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+ zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+ zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+ zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+ zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+
+ zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]);
+ zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]);
+ zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32);
+ zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]);
+ zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]);
+
+ if (!zero_flag[0] && !zero_flag[1]) {
+ col[i32 + 0] = _mm_setzero_si128();
+ col[i32 + 1] = _mm_setzero_si128();
+ col[i32 + 2] = _mm_setzero_si128();
+ col[i32 + 3] = _mm_setzero_si128();
+ col[i32 + 4] = _mm_setzero_si128();
+ col[i32 + 5] = _mm_setzero_si128();
+ col[i32 + 6] = _mm_setzero_si128();
+ col[i32 + 7] = _mm_setzero_si128();
+ col[i32 + 8] = _mm_setzero_si128();
+ col[i32 + 9] = _mm_setzero_si128();
+ col[i32 + 10] = _mm_setzero_si128();
+ col[i32 + 11] = _mm_setzero_si128();
+ col[i32 + 12] = _mm_setzero_si128();
+ col[i32 + 13] = _mm_setzero_si128();
+ col[i32 + 14] = _mm_setzero_si128();
+ col[i32 + 15] = _mm_setzero_si128();
+ col[i32 + 16] = _mm_setzero_si128();
+ col[i32 + 17] = _mm_setzero_si128();
+ col[i32 + 18] = _mm_setzero_si128();
+ col[i32 + 19] = _mm_setzero_si128();
+ col[i32 + 20] = _mm_setzero_si128();
+ col[i32 + 21] = _mm_setzero_si128();
+ col[i32 + 22] = _mm_setzero_si128();
+ col[i32 + 23] = _mm_setzero_si128();
+ col[i32 + 24] = _mm_setzero_si128();
+ col[i32 + 25] = _mm_setzero_si128();
+ col[i32 + 26] = _mm_setzero_si128();
+ col[i32 + 27] = _mm_setzero_si128();
+ col[i32 + 28] = _mm_setzero_si128();
+ col[i32 + 29] = _mm_setzero_si128();
+ col[i32 + 30] = _mm_setzero_si128();
+ col[i32 + 31] = _mm_setzero_si128();
+ continue;
+ }
// Transpose 32x8 block to 8x32 block
TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
@@ -3239,38 +3376,38 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {
// final stage
if (i < 4) {
// 1_D: Store 32 intermediate results for each 8x32 block.
- col[i * 32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
- col[i * 32 + 1] = _mm_add_epi16(stp1_1, stp1_30);
- col[i * 32 + 2] = _mm_add_epi16(stp1_2, stp1_29);
- col[i * 32 + 3] = _mm_add_epi16(stp1_3, stp1_28);
- col[i * 32 + 4] = _mm_add_epi16(stp1_4, stp1_27);
- col[i * 32 + 5] = _mm_add_epi16(stp1_5, stp1_26);
- col[i * 32 + 6] = _mm_add_epi16(stp1_6, stp1_25);
- col[i * 32 + 7] = _mm_add_epi16(stp1_7, stp1_24);
- col[i * 32 + 8] = _mm_add_epi16(stp1_8, stp1_23);
- col[i * 32 + 9] = _mm_add_epi16(stp1_9, stp1_22);
- col[i * 32 + 10] = _mm_add_epi16(stp1_10, stp1_21);
- col[i * 32 + 11] = _mm_add_epi16(stp1_11, stp1_20);
- col[i * 32 + 12] = _mm_add_epi16(stp1_12, stp1_19);
- col[i * 32 + 13] = _mm_add_epi16(stp1_13, stp1_18);
- col[i * 32 + 14] = _mm_add_epi16(stp1_14, stp1_17);
- col[i * 32 + 15] = _mm_add_epi16(stp1_15, stp1_16);
- col[i * 32 + 16] = _mm_sub_epi16(stp1_15, stp1_16);
- col[i * 32 + 17] = _mm_sub_epi16(stp1_14, stp1_17);
- col[i * 32 + 18] = _mm_sub_epi16(stp1_13, stp1_18);
- col[i * 32 + 19] = _mm_sub_epi16(stp1_12, stp1_19);
- col[i * 32 + 20] = _mm_sub_epi16(stp1_11, stp1_20);
- col[i * 32 + 21] = _mm_sub_epi16(stp1_10, stp1_21);
- col[i * 32 + 22] = _mm_sub_epi16(stp1_9, stp1_22);
- col[i * 32 + 23] = _mm_sub_epi16(stp1_8, stp1_23);
- col[i * 32 + 24] = _mm_sub_epi16(stp1_7, stp1_24);
- col[i * 32 + 25] = _mm_sub_epi16(stp1_6, stp1_25);
- col[i * 32 + 26] = _mm_sub_epi16(stp1_5, stp1_26);
- col[i * 32 + 27] = _mm_sub_epi16(stp1_4, stp1_27);
- col[i * 32 + 28] = _mm_sub_epi16(stp1_3, stp1_28);
- col[i * 32 + 29] = _mm_sub_epi16(stp1_2, stp1_29);
- col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30);
- col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
+ col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
+ col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30);
+ col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29);
+ col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28);
+ col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27);
+ col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26);
+ col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25);
+ col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24);
+ col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23);
+ col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22);
+ col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21);
+ col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20);
+ col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19);
+ col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18);
+ col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17);
+ col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16);
+ col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16);
+ col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17);
+ col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18);
+ col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19);
+ col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20);
+ col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21);
+ col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22);
+ col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23);
+ col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24);
+ col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25);
+ col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26);
+ col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27);
+ col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28);
+ col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29);
+ col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30);
+ col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
} else {
const __m128i zero = _mm_setzero_si128();
diff --git a/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm b/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm
index bc8ed5c1f..8ba26f310 100644
--- a/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm
+++ b/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -10,6 +10,31 @@
%include "third_party/x86inc/x86inc.asm"
+SECTION_RODATA
+
+pb_1: times 16 db 1
+pw_2: times 8 dw 2
+pb_7m1: times 8 db 7, -1
+pb_15: times 16 db 15
+
+sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7
+sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7
+sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
+sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
+sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+sh_b2w01234577: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 7, -1, 7, -1
+sh_b2w12345677: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1
+sh_b2w23456777: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1, 7, -1
+sh_b2w01234567: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1
+sh_b2w12345678: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1
+sh_b2w23456789: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1, 9, -1
+sh_b2w89abcdef: db 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1
+sh_b2w9abcdeff: db 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1
+sh_b2wabcdefff: db 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1, 15, -1
+sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
+sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
+
SECTION .text
INIT_MMX ssse3
@@ -85,3 +110,182 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
inc lineq
jnz .loop
REP_RET
+
+INIT_MMX ssse3
+cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
+ movq m0, [aboveq]
+ pshufb m2, m0, [sh_b23456777]
+ pshufb m1, m0, [sh_b01234577]
+ pshufb m0, [sh_b12345677]
+ pavgb m3, m2, m1
+ pxor m2, m1
+ pand m2, [pb_1]
+ psubb m3, m2
+ pavgb m0, m3
+
+ ; store 4 lines
+ movd [dstq ], m0
+ psrlq m0, 8
+ movd [dstq+strideq], m0
+ lea dstq, [dstq+strideq*2]
+ psrlq m0, 8
+ movd [dstq ], m0
+ psrlq m0, 8
+ movd [dstq+strideq], m0
+ RET
+
+INIT_MMX ssse3
+cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
+ movq m0, [aboveq]
+ mova m1, [sh_b12345677]
+ DEFINE_ARGS dst, stride, stride3, line
+ lea stride3q, [strideq*3]
+ pshufb m2, m0, [sh_b23456777]
+ pavgb m3, m2, m0
+ pxor m2, m0
+ pshufb m0, m1
+ pand m2, [pb_1]
+ psubb m3, m2
+ pavgb m0, m3
+
+ ; store 4 lines
+ movq [dstq ], m0
+ pshufb m0, m1
+ movq [dstq+strideq ], m0
+ pshufb m0, m1
+ movq [dstq+strideq*2], m0
+ pshufb m0, m1
+ movq [dstq+stride3q ], m0
+ pshufb m0, m1
+ lea dstq, [dstq+strideq*4]
+
+ ; store next 4 lines
+ movq [dstq ], m0
+ pshufb m0, m1
+ movq [dstq+strideq ], m0
+ pshufb m0, m1
+ movq [dstq+strideq*2], m0
+ pshufb m0, m1
+ movq [dstq+stride3q ], m0
+ RET
+
+INIT_XMM ssse3
+cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3, dst8, line
+ lea stride3q, [strideq*3]
+ lea dst8q, [dstq+strideq*8]
+ mova m1, [sh_b123456789abcdeff]
+ pshufb m2, m0, [sh_b23456789abcdefff]
+ pavgb m3, m2, m0
+ pxor m2, m0
+ pshufb m0, m1
+ pand m2, [pb_1]
+ psubb m3, m2
+ pavgb m0, m3
+
+ ; first 4 lines and first half of 3rd 4 lines
+ mov lined, 2
+.loop:
+ mova [dstq ], m0
+ movhps [dst8q ], m0
+ pshufb m0, m1
+ mova [dstq +strideq ], m0
+ movhps [dst8q+strideq ], m0
+ pshufb m0, m1
+ mova [dstq +strideq*2 ], m0
+ movhps [dst8q+strideq*2 ], m0
+ pshufb m0, m1
+ mova [dstq +stride3q ], m0
+ movhps [dst8q+stride3q ], m0
+ pshufb m0, m1
+ lea dstq, [dstq +strideq*4]
+ lea dst8q, [dst8q+strideq*4]
+ dec lined
+ jnz .loop
+
+ ; bottom-right 8x8 block
+ movhps [dstq +8], m0
+ movhps [dstq+strideq +8], m0
+ movhps [dstq+strideq*2+8], m0
+ movhps [dstq+stride3q +8], m0
+ lea dstq, [dstq+strideq*4]
+ movhps [dstq +8], m0
+ movhps [dstq+strideq +8], m0
+ movhps [dstq+strideq*2+8], m0
+ movhps [dstq+stride3q +8], m0
+ RET
+
+INIT_XMM ssse3
+cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
+ mova m0, [aboveq]
+ mova m4, [aboveq+16]
+ DEFINE_ARGS dst, stride, stride3, dst16, line
+ lea stride3q, [strideq*3]
+ lea dst16q, [dstq +strideq*8]
+ lea dst16q, [dst16q+strideq*8]
+ mova m1, [sh_b123456789abcdeff]
+ pshufb m2, m4, [sh_b23456789abcdefff]
+ pavgb m3, m2, m4
+ pxor m2, m4
+ palignr m5, m4, m0, 1
+ palignr m6, m4, m0, 2
+ pshufb m4, m1
+ pand m2, [pb_1]
+ psubb m3, m2
+ pavgb m4, m3
+ pavgb m3, m0, m6
+ pxor m0, m6
+ pand m0, [pb_1]
+ psubb m3, m0
+ pavgb m5, m3
+
+ ; write 4x4 lines (and the first half of the second 4x4 lines)
+ mov lined, 4
+.loop:
+ mova [dstq ], m5
+ mova [dstq +16], m4
+ mova [dst16q ], m4
+ palignr m3, m4, m5, 1
+ pshufb m4, m1
+ mova [dstq +strideq ], m3
+ mova [dstq +strideq +16], m4
+ mova [dst16q+strideq ], m4
+ palignr m5, m4, m3, 1
+ pshufb m4, m1
+ mova [dstq +strideq*2 ], m5
+ mova [dstq +strideq*2+16], m4
+ mova [dst16q+strideq*2 ], m4
+ palignr m3, m4, m5, 1
+ pshufb m4, m1
+ mova [dstq +stride3q ], m3
+ mova [dstq +stride3q +16], m4
+ mova [dst16q+stride3q ], m4
+ palignr m5, m4, m3, 1
+ pshufb m4, m1
+ lea dstq, [dstq +strideq*4]
+ lea dst16q, [dst16q+strideq*4]
+ dec lined
+ jnz .loop
+
+ ; write second half of second 4x4 lines
+ mova [dstq +16], m4
+ mova [dstq +strideq +16], m4
+ mova [dstq +strideq*2+16], m4
+ mova [dstq +stride3q +16], m4
+ lea dstq, [dstq +strideq*4]
+ mova [dstq +16], m4
+ mova [dstq +strideq +16], m4
+ mova [dstq +strideq*2+16], m4
+ mova [dstq +stride3q +16], m4
+ lea dstq, [dstq +strideq*4]
+ mova [dstq +16], m4
+ mova [dstq +strideq +16], m4
+ mova [dstq +strideq*2+16], m4
+ mova [dstq +stride3q +16], m4
+ lea dstq, [dstq +strideq*4]
+ mova [dstq +16], m4
+ mova [dstq +strideq +16], m4
+ mova [dstq +strideq*2+16], m4
+ mova [dstq +stride3q +16], m4
+ RET
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index 6f0044a4a..a3e2ad39d 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -30,8 +30,12 @@ static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p);
}
-static MB_PREDICTION_MODE read_inter_mode(vp9_reader *r, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(r, vp9_inter_mode_tree, p);
+static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r,
+ uint8_t context) {
+ MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree,
+ cm->fc.inter_mode_probs[context]);
+ ++cm->counts.inter_mode[context][inter_mode_offset(mode)];
+ return mode;
}
static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
@@ -43,9 +47,9 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
const uint8_t context = vp9_get_pred_context_tx_size(xd);
const vp9_prob *tx_probs = get_tx_probs(bsize, context, &cm->fc.tx_probs);
TX_SIZE tx_size = vp9_read(r, tx_probs[0]);
- if (tx_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) {
+ if (tx_size != TX_4X4 && bsize >= BLOCK_16X16) {
tx_size += vp9_read(r, tx_probs[1]);
- if (tx_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32)
+ if (tx_size != TX_8X8 && bsize >= BLOCK_32X32)
tx_size += vp9_read(r, tx_probs[2]);
}
@@ -54,18 +58,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
}
static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode,
- BLOCK_SIZE_TYPE bsize, int select_cond,
+ BLOCK_SIZE_TYPE bsize, int allow_select,
vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- if (tx_mode == TX_MODE_SELECT && bsize >= BLOCK_SIZE_SB8X8 && select_cond)
+ if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, bsize, r);
- else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_SIZE_SB32X32)
+ else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32)
return TX_32X32;
- else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_SIZE_MB16X16)
+ else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16)
return TX_16X16;
- else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_SIZE_SB8X8)
+ else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8)
return TX_8X8;
else
return TX_4X4;
@@ -146,8 +150,8 @@ static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
return skip_coeff;
}
-static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
- int mi_row, int mi_col, vp9_reader *r) {
+static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
+ int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
MB_MODE_INFO *const mbmi = &m->mbmi;
@@ -158,6 +162,7 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r);
mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
if (bsize >= BLOCK_SIZE_SB8X8) {
const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis);
@@ -166,12 +171,12 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]);
} else {
// Only 4x4, 4x8, 8x4 blocks
- const int bw = 1 << b_width_log2(bsize);
- const int bh = 1 << b_height_log2(bsize);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
const int ib = idy * 2 + idx;
const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
@@ -179,9 +184,9 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
const MB_PREDICTION_MODE b_mode = read_intra_mode(r,
vp9_kf_y_mode_prob[A][L]);
m->bmi[ib].as_mode = b_mode;
- if (bh == 2)
+ if (num_4x4_h == 2)
m->bmi[ib + 2].as_mode = b_mode;
- if (bw == 2)
+ if (num_4x4_w == 2)
m->bmi[ib + 1].as_mode = b_mode;
}
}
@@ -228,16 +233,16 @@ static int read_mv_component(vp9_reader *r,
static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
- nmv_context_counts *counts, int usehp) {
+ nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints);
+ const int use_hp = allow_hp && vp9_use_mv_hp(ref);
MV diff = {0, 0};
- usehp = usehp && vp9_use_mv_hp(ref);
if (mv_joint_vertical(j))
- diff.row = read_mv_component(r, &ctx->comps[0], usehp);
+ diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
if (mv_joint_horizontal(j))
- diff.col = read_mv_component(r, &ctx->comps[1], usehp);
+ diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
vp9_inc_mv(&diff, counts);
@@ -245,29 +250,30 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
mv->col = ref->col + diff.col;
}
-static void update_mv(vp9_reader *r, vp9_prob *p, vp9_prob upd_p) {
- if (vp9_read(r, upd_p))
+static void update_mv(vp9_reader *r, vp9_prob *p) {
+ if (vp9_read(r, VP9_NMV_UPDATE_PROB))
*p = (vp9_read_literal(r, 7) << 1) | 1;
}
-static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) {
+static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
int i, j, k;
for (j = 0; j < MV_JOINTS - 1; ++j)
- update_mv(r, &mvc->joints[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &mvc->joints[j]);
for (i = 0; i < 2; ++i) {
nmv_component *const comp = &mvc->comps[i];
- update_mv(r, &comp->sign, VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->sign);
+
for (j = 0; j < MV_CLASSES - 1; ++j)
- update_mv(r, &comp->classes[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->classes[j]);
for (j = 0; j < CLASS0_SIZE - 1; ++j)
- update_mv(r, &comp->class0[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->class0[j]);
for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_mv(r, &comp->bits[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->bits[j]);
}
for (i = 0; i < 2; ++i) {
@@ -275,23 +281,23 @@ static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) {
for (j = 0; j < CLASS0_SIZE; ++j)
for (k = 0; k < 3; ++k)
- update_mv(r, &comp->class0_fp[j][k], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->class0_fp[j][k]);
for (j = 0; j < 3; ++j)
- update_mv(r, &comp->fp[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->fp[j]);
}
- if (usehp) {
+ if (allow_hp) {
for (i = 0; i < 2; ++i) {
- update_mv(r, &mvc->comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
- update_mv(r, &mvc->comps[i].hp, VP9_NMV_UPDATE_PROB);
+ update_mv(r, &mvc->comps[i].class0_hp);
+ update_mv(r, &mvc->comps[i].hp);
}
}
}
// Read the referncence frame
-static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r,
- int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
+static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r,
+ int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
FRAME_CONTEXT *const fc = &cm->fc;
@@ -320,18 +326,19 @@ static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r,
ref_frame[fix_ref_idx] = cm->comp_fixed_ref;
ref_frame[!fix_ref_idx] = cm->comp_var_ref[b];
} else {
- const int ref1_ctx = vp9_get_pred_context_single_ref_p1(xd);
- ref_frame[1] = NONE;
- if (vp9_read(r, fc->single_ref_prob[ref1_ctx][0])) {
- const int ref2_ctx = vp9_get_pred_context_single_ref_p2(xd);
- const int b = vp9_read(r, fc->single_ref_prob[ref2_ctx][1]);
- ref_frame[0] = b ? ALTREF_FRAME : GOLDEN_FRAME;
- counts->single_ref[ref1_ctx][0][1]++;
- counts->single_ref[ref2_ctx][1][b]++;
+ const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
+ const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
+ ++counts->single_ref[ctx0][0][bit0];
+ if (bit0) {
+ const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
+ const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
+ ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
+ ++counts->single_ref[ctx1][1][bit1];
} else {
ref_frame[0] = LAST_FRAME;
- counts->single_ref[ref1_ctx][0][0]++;
}
+
+ ref_frame[1] = NONE;
}
}
}
@@ -359,16 +366,6 @@ static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
return mode;
}
-static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src,
- int mb_to_left_edge,
- int mb_to_right_edge,
- int mb_to_top_edge,
- int mb_to_bottom_edge) {
- dst->as_int = src->as_int;
- clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge,
- mb_to_bottom_edge);
-}
-
static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
VP9D_COMP *pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
@@ -380,32 +377,35 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
return vp9_switchable_interp[index];
}
-static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi,
- vp9_reader *r) {
+static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
if (bsize >= BLOCK_SIZE_SB8X8) {
- const int size_group = MIN(3, MIN(bwl, bhl));
+ const int size_group = size_group_lookup[bsize];
mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]);
cm->counts.y_mode[size_group][mbmi->mode]++;
} else {
// Only 4x4, 4x8, 8x4 blocks
- const int bw = 1 << bwl, bh = 1 << bhl;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
const int ib = idy * 2 + idx;
const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]);
mi->bmi[ib].as_mode = b_mode;
cm->counts.y_mode[0][b_mode]++;
- if (bh == 2)
+ if (num_4x4_h == 2)
mi->bmi[ib + 2].as_mode = b_mode;
- if (bw == 2)
+ if (num_4x4_w == 2)
mi->bmi[ib + 1].as_mode = b_mode;
}
}
@@ -416,203 +416,197 @@ static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi,
cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++;
}
-static MV_REFERENCE_FRAME read_reference_frame(VP9D_COMP *pbi, int segment_id,
- vp9_reader *r) {
+static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- MV_REFERENCE_FRAME ref;
- if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) {
- const int ctx = vp9_get_pred_context_intra_inter(xd);
- ref = (MV_REFERENCE_FRAME)
- vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd));
- cm->counts.intra_inter[ctx][ref != INTRA_FRAME]++;
+ if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) !=
+ INTRA_FRAME;
} else {
- ref = (MV_REFERENCE_FRAME) vp9_get_segdata(&xd->seg, segment_id,
- SEG_LVL_REF_FRAME) != INTRA_FRAME;
+ const int ctx = vp9_get_pred_context_intra_inter(xd);
+ const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd));
+ ++cm->counts.intra_inter[ctx][is_inter];
+ return is_inter;
}
- return ref;
}
-static void read_inter_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
- int mi_row, int mi_col, vp9_reader *r) {
+static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
nmv_context *const nmvc = &cm->fc.nmvc;
MB_MODE_INFO *const mbmi = &mi->mbmi;
-
int_mv *const mv0 = &mbmi->mv[0];
int_mv *const mv1 = &mbmi->mv[1];
- const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
- const int bw = 1 << b_width_log2(bsize);
- const int bh = 1 << b_height_log2(bsize);
-
- int idx, idy;
+ const BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
+ const int allow_hp = xd->allow_high_precision_mv;
- mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r);
- mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
- mbmi->ref_frame[0] = read_reference_frame(pbi, mbmi->segment_id, r);
- mbmi->ref_frame[1] = NONE;
- mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize,
- (!mbmi->mb_skip_coeff || mbmi->ref_frame[0] == INTRA_FRAME), r);
+ int_mv nearest, nearby, best_mv;
+ int_mv nearest_second, nearby_second, best_mv_second;
+ uint8_t inter_mode_ctx;
+ MV_REFERENCE_FRAME ref0, ref1;
- if (mbmi->ref_frame[0] != INTRA_FRAME) {
- int_mv nearest, nearby, best_mv;
- int_mv nearest_second, nearby_second, best_mv_second;
- vp9_prob *mv_ref_p;
- MV_REFERENCE_FRAME ref0, ref1;
+ read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame);
+ ref0 = mbmi->ref_frame[0];
+ ref1 = mbmi->ref_frame[1];
- read_ref_frame(pbi, r, mbmi->segment_id, mbmi->ref_frame);
- ref0 = mbmi->ref_frame[0];
- ref1 = mbmi->ref_frame[1];
+ vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
+ ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias,
+ mi_row, mi_col);
- vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
- ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias);
+ inter_mode_ctx = mbmi->mb_mode_context[ref0];
- mv_ref_p = cm->fc.inter_mode_probs[mbmi->mb_mode_context[ref0]];
+ if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ mbmi->mode = ZEROMV;
+ else if (bsize >= BLOCK_SIZE_SB8X8)
+ mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx);
- if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- mbmi->mode = ZEROMV;
- } else if (bsize >= BLOCK_SIZE_SB8X8) {
- mbmi->mode = read_inter_mode(r, mv_ref_p);
- vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref0]);
- }
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
- // nearest, nearby
- if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
- best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
- }
+ // nearest, nearby
+ if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
+ best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
+ }
- mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
- ? read_switchable_filter_type(pbi, r)
- : cm->mcomp_filter_type;
+ mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
+ ? read_switchable_filter_type(pbi, r)
+ : cm->mcomp_filter_type;
- if (ref1 > INTRA_FRAME) {
- vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
- ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias);
+ if (ref1 > INTRA_FRAME) {
+ vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
+ ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias,
+ mi_row, mi_col);
- if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
- &nearest_second, &nearby_second);
- best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
- }
+ if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
+ &nearest_second, &nearby_second);
+ best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
}
+ }
+ if (bsize < BLOCK_SIZE_SB8X8) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
+ int idx, idy;
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ int_mv blockmv, secondmv;
+ const int j = idy * 2 + idx;
+ const int b_mode = read_inter_mode(cm, r, inter_mode_ctx);
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
- int_mv blockmv, secondmv;
- const int j = idy * 2 + idx;
- const int blockmode = read_inter_mode(r, mv_ref_p);
+ if (b_mode == NEARESTMV || b_mode == NEARMV) {
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0,
+ mi_row, mi_col);
- vp9_accum_mv_refs(cm, blockmode, mbmi->mb_mode_context[ref0]);
- if (blockmode == NEARESTMV || blockmode == NEARMV) {
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0);
- if (ref1 > 0)
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
- &nearby_second, j, 1);
- }
-
- switch (blockmode) {
- case NEWMV:
- read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
-
- if (ref1 > 0)
- read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
- break;
- case NEARESTMV:
- blockmv.as_int = nearest.as_int;
- if (ref1 > 0)
- secondmv.as_int = nearest_second.as_int;
- break;
- case NEARMV:
- blockmv.as_int = nearby.as_int;
- if (ref1 > 0)
- secondmv.as_int = nearby_second.as_int;
- break;
- case ZEROMV:
- blockmv.as_int = 0;
- if (ref1 > 0)
- secondmv.as_int = 0;
- break;
- default:
- assert(!"Invalid inter mode value");
- }
- mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
if (ref1 > 0)
- mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
-
- if (bh == 2)
- mi->bmi[j + 2] = mi->bmi[j];
- if (bw == 2)
- mi->bmi[j + 1] = mi->bmi[j];
- mi->mbmi.mode = blockmode;
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
+ &nearby_second, j, 1,
+ mi_row, mi_col);
}
- }
- mv0->as_int = mi->bmi[3].as_mv[0].as_int;
- mv1->as_int = mi->bmi[3].as_mv[1].as_int;
- } else {
- const int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- const int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
- const int mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- const int mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
- switch (mbmi->mode) {
- case NEARMV:
- // Clip "next_nearest" so that it does not extend to far out of image
- assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- if (ref1 > 0)
- assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- break;
-
- case NEARESTMV:
- // Clip "next_nearest" so that it does not extend to far out of image
- assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- if (ref1 > 0)
- assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- break;
-
- case ZEROMV:
- mv0->as_int = 0;
- if (ref1 > 0)
- mv1->as_int = 0;
- break;
+ switch (b_mode) {
+ case NEWMV:
+ read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ &cm->counts.mv, allow_hp);
- case NEWMV:
- read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv,
- xd->allow_high_precision_mv);
- if (ref1 > 0)
- read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
- break;
- default:
- assert(!"Invalid inter mode value");
+ if (ref1 > 0)
+ read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ &cm->counts.mv, allow_hp);
+ break;
+ case NEARESTMV:
+ blockmv.as_int = nearest.as_int;
+ if (ref1 > 0)
+ secondmv.as_int = nearest_second.as_int;
+ break;
+ case NEARMV:
+ blockmv.as_int = nearby.as_int;
+ if (ref1 > 0)
+ secondmv.as_int = nearby_second.as_int;
+ break;
+ case ZEROMV:
+ blockmv.as_int = 0;
+ if (ref1 > 0)
+ secondmv.as_int = 0;
+ break;
+ default:
+ assert(!"Invalid inter mode value");
+ }
+ mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
+ if (ref1 > 0)
+ mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
+
+ if (num_4x4_h == 2)
+ mi->bmi[j + 2] = mi->bmi[j];
+ if (num_4x4_w == 2)
+ mi->bmi[j + 1] = mi->bmi[j];
+ mi->mbmi.mode = b_mode;
}
}
+
+ mv0->as_int = mi->bmi[3].as_mv[0].as_int;
+ mv1->as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- mv0->as_int = 0; // required for left and above block mv
- read_intra_block_modes(pbi, mi, r);
+ switch (mbmi->mode) {
+ case NEARMV:
+ mv0->as_int = nearby.as_int;
+ clamp_mv2(&mv0->as_mv, xd);
+
+ if (ref1 > 0) {
+ mv1->as_int = nearby_second.as_int;
+ clamp_mv2(&mv1->as_mv, xd);
+ }
+ break;
+
+ case NEARESTMV:
+ mv0->as_int = nearest.as_int;
+ clamp_mv2(&mv0->as_mv, xd);
+
+ if (ref1 > 0) {
+ mv1->as_int = nearest_second.as_int;
+ clamp_mv2(&mv1->as_mv, xd);
+ }
+ break;
+
+ case ZEROMV:
+ mv0->as_int = 0;
+ if (ref1 > 0)
+ mv1->as_int = 0;
+ break;
+
+ case NEWMV:
+ read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp);
+ if (ref1 > 0)
+ read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv,
+ allow_hp);
+ break;
+ default:
+ assert(!"Invalid inter mode value");
+ }
}
}
+static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ int mi_row, int mi_col, vp9_reader *r) {
+ VP9_COMMON *const cm = &pbi->common;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ int inter_block;
+
+ mbmi->mv[0].as_int = 0;
+ mbmi->mv[1].as_int = 0;
+ mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r);
+ mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
+ inter_block = read_is_inter_block(pbi, mbmi->segment_id, r);
+ mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type,
+ !mbmi->mb_skip_coeff || !inter_block, r);
+
+ if (inter_block)
+ read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r);
+ else
+ read_intra_block_mode_info(pbi, mi, r);
+}
+
static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
int i;
@@ -690,9 +684,9 @@ void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) {
int x, y;
if (cm->frame_type == KEY_FRAME || cm->intra_only)
- read_intra_mode_info(pbi, mi, mi_row, mi_col, r);
+ read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r);
else
- read_inter_mode_info(pbi, mi, mi_row, mi_col, r);
+ read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r);
for (y = 0; y < y_mis; y++)
for (x = !y; x < x_mis; x++)
diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h
index 4073d9e04..462d2e398 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/libvpx/vp9/decoder/vp9_decodemv.h
@@ -12,6 +12,7 @@
#define VP9_DECODER_VP9_DECODEMV_H_
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r);
diff --git a/libvpx/vp9/decoder/vp9_decodframe.c b/libvpx/vp9/decoder/vp9_decodframe.c
index ffec8ea44..feb602402 100644
--- a/libvpx/vp9/decoder/vp9_decodframe.c
+++ b/libvpx/vp9/decoder/vp9_decodframe.c
@@ -31,8 +31,11 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_dsubexp.h"
+#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
+#include "vp9/decoder/vp9_thread.h"
+#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
@@ -59,17 +62,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
int i, j;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
+ for (j = 0; j < TX_SIZES - 3; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
+ for (j = 0; j < TX_SIZES - 2; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
+ for (j = 0; j < TX_SIZES - 1; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
@@ -138,8 +141,8 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int mode = plane == 0 ? mi->mbmi.mode
: mi->mbmi.uv_mode;
- if (plane == 0 && mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
- assert(bsize == BLOCK_SIZE_SB8X8);
+ if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) {
+ assert(bsize == BLOCK_8X8);
b_mode = mi->bmi[raster_block].as_mode;
} else {
b_mode = mode;
@@ -223,7 +226,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- const int less8x8 = bsize < BLOCK_SIZE_SB8X8;
+ const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi;
if (less8x8)
@@ -234,12 +237,12 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_read_mode_info(pbi, mi_row, mi_col, r);
if (less8x8)
- bsize = BLOCK_SIZE_SB8X8;
+ bsize = BLOCK_8X8;
// Has to be called after set_offsets
mbmi = &xd->mode_info_context->mbmi;
- if (mbmi->ref_frame[0] == INTRA_FRAME) {
+ if (!is_inter_block(mbmi)) {
// Intra reconstruction
decode_tokens(pbi, bsize, r);
foreach_transformed_block(xd, bsize, decode_block_intra, xd);
@@ -280,12 +283,12 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols)
return;
- if (bsize < BLOCK_SIZE_SB8X8) {
+ if (bsize < BLOCK_8X8) {
if (xd->ab_index != 0)
return;
} else {
int pl;
- const int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize);
+ const int idx = check_bsize_coverage(pc, mi_row, mi_col, bsize);
set_partition_seg_context(pc, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
@@ -332,8 +335,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
}
// update partition context
- if (bsize >= BLOCK_SIZE_SB8X8 &&
- (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
+ if (bsize >= BLOCK_8X8 &&
+ (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) {
set_partition_seg_context(pc, xd, mi_row, mi_col);
update_partition_context(xd, subsize, bsize);
}
@@ -499,7 +502,7 @@ static INTERPOLATIONFILTERTYPE read_interp_filter_type(
: vp9_rb_read_literal(rb, 2);
}
-static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb,
+static void read_frame_size(struct vp9_read_bit_buffer *rb,
int *width, int *height) {
const int w = vp9_rb_read_literal(rb, 16) + 1;
const int h = vp9_rb_read_literal(rb, 16) + 1;
@@ -507,12 +510,11 @@ static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb,
*height = h;
}
-static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) {
- VP9_COMMON *const cm = &pbi->common;
+static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
cm->display_width = cm->width;
cm->display_height = cm->height;
if (vp9_rb_read_bit(rb))
- read_frame_size(cm, rb, &cm->display_width, &cm->display_height);
+ read_frame_size(rb, &cm->display_width, &cm->display_height);
}
static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
@@ -548,10 +550,9 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
static void setup_frame_size(VP9D_COMP *pbi,
struct vp9_read_bit_buffer *rb) {
- VP9_COMMON *const cm = &pbi->common;
int width, height;
- read_frame_size(cm, rb, &width, &height);
- setup_display_size(pbi, rb);
+ read_frame_size(rb, &width, &height);
+ setup_display_size(&pbi->common, rb);
apply_frame_size(pbi, width, height);
}
@@ -572,21 +573,29 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
}
if (!found)
- read_frame_size(cm, rb, &width, &height);
+ read_frame_size(rb, &width, &height);
if (!width || !height)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame with invalid size");
- setup_display_size(pbi, rb);
+ setup_display_size(cm, rb);
apply_frame_size(pbi, width, height);
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
+ const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ lf_data->cm = pc;
+ lf_data->xd = pbi->mb;
+ lf_data->y_only = 0;
+ }
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
@@ -597,21 +606,37 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context));
for (mi_col = pc->cur_tile_mi_col_start; mi_col < pc->cur_tile_mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64);
+ decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64);
}
if (pbi->do_loopfilter_inline) {
- YV12_BUFFER_CONFIG *const fb =
- &pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
- vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+ vp9_worker_sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = mi_row;
+ pbi->lf_worker.hook = vp9_loop_filter_worker;
+ vp9_worker_launch(&pbi->lf_worker);
+ } else {
+ YV12_BUFFER_CONFIG *const fb =
+ &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+ }
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ if (num_threads > 1) {
+ // TODO(jzern): since the loop filter is delayed one mb row, this will be
+ // forced to wait for the last row scheduled in the for loop.
+ vp9_worker_sync(&pbi->lf_worker);
+ }
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}
@@ -994,7 +1019,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
if (!keyframe && !pc->intra_only) {
vp9_adapt_mode_probs(pc);
- vp9_adapt_mode_context(pc);
vp9_adapt_mv_probs(pc, xd->allow_high_precision_mv);
}
}
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index 01c1db0b7..002164307 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -15,8 +15,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_treereader.h"
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
@@ -73,7 +75,7 @@ DECLARE_ALIGNED(16, extern const uint8_t,
#define WRITE_COEF_CONTINUE(val, token) \
{ \
qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
- dq[c > 0] / (1 + (txfm_size == TX_32X32)); \
+ dq[c > 0] / (1 + (tx_size == TX_32X32)); \
INCREMENT_COUNT(token); \
c++; \
continue; \
@@ -88,33 +90,24 @@ DECLARE_ALIGNED(16, extern const uint8_t,
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
vp9_reader *r, int block_idx,
PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
- TX_SIZE txfm_size, const int16_t *dq,
+ TX_SIZE tx_size, const int16_t *dq,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
FRAME_CONTEXT *const fc = &cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
ENTROPY_CONTEXT above_ec, left_ec;
- int pt, c = 0;
- int band;
- vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
+ const int ref = is_inter_block(&xd->mode_info_context->mbmi);
+ int band, pt, c = 0;
+ vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] =
+ fc->coef_probs[tx_size][type][ref];
vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
- uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = {
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- };
-
+ uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } };
vp9_prob *prob;
- vp9_coeff_count_model *coef_counts;
- const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME;
+ vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
const int16_t *scan, *nb;
uint8_t token_cache[1024];
const uint8_t * band_translate;
- coef_probs = fc->coef_probs[txfm_size][type][ref];
- coef_counts = counts->coef[txfm_size];
- switch (txfm_size) {
+
+ switch (tx_size) {
default:
case TX_4X4: {
scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
@@ -125,22 +118,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
}
case TX_8X8: {
scan = get_scan_8x8(get_tx_type_8x8(type, xd));
- above_ec = (A[0] + A[1]) != 0;
- left_ec = (L[0] + L[1]) != 0;
+ above_ec = !!*(uint16_t *)A;
+ left_ec = !!*(uint16_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_16X16: {
scan = get_scan_16x16(get_tx_type_16x16(type, xd));
- above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
+ above_ec = !!*(uint32_t *)A;
+ left_ec = !!*(uint32_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_32X32:
scan = vp9_default_scan_32x32;
- above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
+ above_ec = !!*(uint64_t *)A;
+ left_ec = !!*(uint64_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@@ -157,7 +150,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
- counts->eob_branch[txfm_size][type][ref][band][pt]++;
+ counts->eob_branch[tx_size][type][ref][band][pt]++;
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
@@ -276,7 +269,7 @@ static void decode_block(int plane, int block,
const int mod = bw - ss_tx_size - pd->subsampling_x;
const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size;
const int loff = (off >> mod) << ss_tx_size;
-
+ const int tx_size_in_blocks = 1 << ss_tx_size;
ENTROPY_CONTEXT *A = pd->above_context + aoff;
ENTROPY_CONTEXT *L = pd->left_context + loff;
const int eob = decode_coefs(&arg->pbi->common, xd, arg->r, block,
@@ -285,10 +278,11 @@ static void decode_block(int plane, int block,
ss_tx_size, pd->dequant, A, L);
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
- set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L);
+ set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff,
+ A, L);
} else {
int pt;
- for (pt = 0; pt < (1 << ss_tx_size); pt++)
+ for (pt = 0; pt < tx_size_in_blocks; pt++)
A[pt] = L[pt] = eob > 0;
}
pd->eobs[block] = eob;
diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h
index d46b59635..f98fe8d4c 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/libvpx/vp9/decoder/vp9_detokenize.h
@@ -13,6 +13,7 @@
#define VP9_DECODER_VP9_DETOKENIZE_H_
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE_TYPE bsize);
diff --git a/libvpx/vp9/decoder/vp9_idct_blk.c b/libvpx/vp9/decoder/vp9_idct_blk.c
index 0217919da..395e636b8 100644
--- a/libvpx/vp9/decoder/vp9_idct_blk.c
+++ b/libvpx/vp9/decoder/vp9_idct_blk.c
@@ -93,15 +93,11 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob) {
if (eob == 1) {
// DC only DCT coefficient
- int16_t in = input[0];
- int16_t out;
-
- // Note: the idct1 will need to be modified accordingly whenever
- // vp9_short_idct8x8_c() is modified.
- vp9_short_idct1_8x8_c(&in, &out);
+ vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
-
- vp9_add_constant_residual_8x8(out, dest, stride);
+ } else if (eob <= 10) {
+ vp9_short_idct10_8x8_add(input, dest, stride);
+ vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
@@ -127,14 +123,11 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob) {
if (eob == 1) {
/* DC only DCT coefficient. */
- int16_t in = input[0];
- int16_t out;
- /* Note: the idct1 will need to be modified accordingly whenever
- * vp9_short_idct16x16() is modified. */
- vp9_short_idct1_16x16_c(&in, &out);
+ vp9_short_idct16x16_1_add(input, dest, stride);
input[0] = 0;
-
- vp9_add_constant_residual_16x16(out, dest, stride);
+ } else if (eob <= 10) {
+ vp9_short_idct10_16x16_add(input, dest, stride);
+ vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
diff --git a/libvpx/vp9/decoder/vp9_onyxd_if.c b/libvpx/vp9/decoder/vp9_onyxd_if.c
index cb7292006..5a01dd790 100644
--- a/libvpx/vp9/decoder/vp9_onyxd_if.c
+++ b/libvpx/vp9/decoder/vp9_onyxd_if.c
@@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include <stdio.h>
#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
#include "vp9/common/vp9_onyxc_int.h"
#if CONFIG_POSTPROC
@@ -114,7 +114,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
if (!pbi)
return NULL;
- vpx_memset(pbi, 0, sizeof(VP9D_COMP));
+ vp9_zero(*pbi);
if (setjmp(pbi->common.error.jmp)) {
pbi->common.error.setjmp = 0;
@@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
+ if (pbi->oxcf.max_threads > 1) {
+ vp9_worker_init(&pbi->lf_worker);
+ pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
+ pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+ if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
+ vp9_remove_decompressor(pbi);
+ return NULL;
+ }
+ }
+
return pbi;
}
@@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
+ vp9_worker_end(&pbi->lf_worker);
+ vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}
diff --git a/libvpx/vp9/decoder/vp9_onyxd_int.h b/libvpx/vp9/decoder/vp9_onyxd_int.h
index 476006616..a051971a1 100644
--- a/libvpx/vp9/decoder/vp9_onyxd_int.h
+++ b/libvpx/vp9/decoder/vp9_onyxd_int.h
@@ -14,10 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
-
-#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_treereader.h"
+#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -40,6 +38,7 @@ typedef struct VP9Decompressor {
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
+ VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_
diff --git a/libvpx/vp9/decoder/vp9_thread.c b/libvpx/vp9/decoder/vp9_thread.c
new file mode 100644
index 000000000..dc3b68196
--- /dev/null
+++ b/libvpx/vp9/decoder/vp9_thread.c
@@ -0,0 +1,248 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
+
+
+#include <assert.h>
+#include <string.h> // for memset()
+#include "./vp9_thread.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*), void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+ pthread_mutex_t* const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+
+#else // _WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
+
+//------------------------------------------------------------------------------
+
+static THREADFN thread_loop(void *ptr) { // thread loop
+ VP9Worker* const worker = (VP9Worker*)ptr;
+ int done = 0;
+ while (!done) {
+ pthread_mutex_lock(&worker->mutex_);
+ while (worker->status_ == OK) { // wait in idling mode
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ if (worker->status_ == WORK) {
+ if (worker->hook) {
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+ }
+ worker->status_ = OK;
+ } else if (worker->status_ == NOT_OK) { // finish the worker
+ done = 1;
+ }
+ // signal to the main thread that we're done (for Sync())
+ pthread_cond_signal(&worker->condition_);
+ pthread_mutex_unlock(&worker->mutex_);
+ }
+ return THREAD_RETURN(NULL); // Thread is finished
+}
+
+// main thread state control
+static void change_state(VP9Worker* const worker,
+ VP9WorkerStatus new_status) {
+ // no-op when attempting to change state on a thread that didn't come up
+ if (worker->status_ < OK) return;
+
+ pthread_mutex_lock(&worker->mutex_);
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->condition_);
+ }
+ pthread_mutex_unlock(&worker->mutex_);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
+void vp9_worker_init(VP9Worker* const worker) {
+ memset(worker, 0, sizeof(*worker));
+ worker->status_ = NOT_OK;
+}
+
+int vp9_worker_sync(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, OK);
+#endif
+ assert(worker->status_ <= OK);
+ return !worker->had_error;
+}
+
+int vp9_worker_reset(VP9Worker* const worker) {
+ int ok = 1;
+ worker->had_error = 0;
+ if (worker->status_ < OK) {
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&worker->mutex_, NULL) ||
+ pthread_cond_init(&worker->condition_, NULL)) {
+ return 0;
+ }
+ pthread_mutex_lock(&worker->mutex_);
+ ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
+ if (ok) worker->status_ = OK;
+ pthread_mutex_unlock(&worker->mutex_);
+#else
+ worker->status_ = OK;
+#endif
+ } else if (worker->status_ > OK) {
+ ok = vp9_worker_sync(worker);
+ }
+ assert(!ok || (worker->status_ == OK));
+ return ok;
+}
+
+void vp9_worker_launch(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, WORK);
+#else
+ if (worker->hook)
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+#endif
+}
+
+void vp9_worker_end(VP9Worker* const worker) {
+ if (worker->status_ >= OK) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, NOT_OK);
+ pthread_join(worker->thread_, NULL);
+ pthread_mutex_destroy(&worker->mutex_);
+ pthread_cond_destroy(&worker->condition_);
+#else
+ worker->status_ = NOT_OK;
+#endif
+ }
+ assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/libvpx/vp9/decoder/vp9_thread.h b/libvpx/vp9/decoder/vp9_thread.h
new file mode 100644
index 000000000..a8f7e046a
--- /dev/null
+++ b/libvpx/vp9/decoder/vp9_thread.h
@@ -0,0 +1,93 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
+
+
+#ifndef VP9_DECODER_VP9_THREAD_H_
+#define VP9_DECODER_VP9_THREAD_H_
+
+#include "vpx_config.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+ HANDLE waiting_sem_;
+ HANDLE received_sem_;
+ HANDLE signal_event_;
+} pthread_cond_t;
+
+#else
+
+#include <pthread.h>
+
+#endif /* _WIN32 */
+#endif /* CONFIG_MULTITHREAD */
+
+// State of the worker thread object
+typedef enum {
+ NOT_OK = 0, // object is unusable
+ OK, // ready to work
+ WORK // busy finishing the current task
+} VP9WorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*VP9WorkerHook)(void*, void*);
+
+// Synchronize object used to launch job in the worker thread
+typedef struct {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+#endif
+ VP9WorkerStatus status_;
+ VP9WorkerHook hook; // hook to call
+ void* data1; // first argument passed to 'hook'
+ void* data2; // second argument passed to 'hook'
+ int had_error; // return value of the last call to 'hook'
+} VP9Worker;
+
+// Must be called first, before any other method.
+void vp9_worker_init(VP9Worker* const worker);
+// Must be called to initialize the object and spawn the thread. Re-entrant.
+// Will potentially launch the thread. Returns false in case of error.
+int vp9_worker_reset(VP9Worker* const worker);
+// Makes sure the previous work is finished. Returns true if worker->had_error
+// was not set and no error condition was triggered by the working thread.
+int vp9_worker_sync(VP9Worker* const worker);
+// Triggers the thread to call hook() with data1 and data2 argument. These
+// hook/data1/data2 can be changed at any time before calling this function,
+// but not be changed afterward until the next call to vp9_worker_sync().
+void vp9_worker_launch(VP9Worker* const worker);
+// Kill the thread and terminate the object. To use the object again, one
+// must call vp9_worker_reset() again.
+void vp9_worker_end(VP9Worker* const worker);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* VP9_DECODER_VP9_THREAD_H_ */
diff --git a/libvpx/vp9/decoder/vp9_treereader.h b/libvpx/vp9/decoder/vp9_treereader.h
index 4535688ea..710cc4cd0 100644
--- a/libvpx/vp9/decoder/vp9_treereader.h
+++ b/libvpx/vp9/decoder/vp9_treereader.h
@@ -15,7 +15,6 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/decoder/vp9_dboolhuff.h"
-#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8))
#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value))
// Intent of tree data structure is to make decoding trivial.
diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c
index ad0f6c531..98ef42074 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/libvpx/vp9/encoder/vp9_bitstream.c
@@ -44,16 +44,16 @@ unsigned __int64 Sectionbits[500];
int intra_mode_stats[VP9_INTRA_MODES]
[VP9_INTRA_MODES]
[VP9_INTRA_MODES];
-vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
extern unsigned int active_section;
#endif
#ifdef MODE_STATS
-int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB];
-int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1];
-int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2];
+int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES];
+int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2];
int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1]
[VP9_SWITCHABLE_FILTERS];
@@ -70,17 +70,17 @@ void init_switchable_interp_stats() {
static void update_tx_count_stats(VP9_COMMON *cm) {
int i, j;
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- for (j = 0; j < TX_SIZE_MAX_SB; j++) {
+ for (j = 0; j < TX_SIZES; j++) {
tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j];
}
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) {
+ for (j = 0; j < TX_SIZES - 1; j++) {
tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j];
}
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) {
+ for (j = 0; j < TX_SIZES - 2; j++) {
tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j];
}
}
@@ -103,30 +103,30 @@ void write_tx_count_stats() {
fclose(fp);
printf(
- "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB] = {\n");
+ "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZES] = {\n");
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
printf(" { ");
- for (j = 0; j < TX_SIZE_MAX_SB; j++) {
+ for (j = 0; j < TX_SIZES; j++) {
printf("%"PRId64", ", tx_count_32x32p_stats[i][j]);
}
printf("},\n");
}
printf("};\n");
printf(
- "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-1] = {\n");
+ "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZES-1] = {\n");
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
printf(" { ");
- for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) {
+ for (j = 0; j < TX_SIZES - 1; j++) {
printf("%"PRId64", ", tx_count_16x16p_stats[i][j]);
}
printf("},\n");
}
printf("};\n");
printf(
- "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-2] = {\n");
+ "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZES-2] = {\n");
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
printf(" { ");
- for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) {
+ for (j = 0; j < TX_SIZES - 2; j++) {
printf("%"PRId64", ", tx_count_8x8p_stats[i][j]);
}
printf("},\n");
@@ -169,7 +169,6 @@ void vp9_encode_unsigned_max(struct vp9_write_bit_buffer *wb,
static void update_mode(
vp9_writer *w,
int n,
- const struct vp9_token tok[/* n */],
vp9_tree tree,
vp9_prob Pnew[/* n-1 */],
vp9_prob Pcur[/* n-1 */],
@@ -194,20 +193,19 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi,
unsigned int bct[VP9_INTRA_MODES - 1][2];
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
- update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_encodings,
- vp9_intra_mode_tree, pnew,
+ update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew,
cm->fc.y_mode_prob[j], bct,
(unsigned int *)cpi->y_mode_count[j]);
}
-static void write_selected_txfm_size(const VP9_COMP *cpi, TX_SIZE tx_size,
- BLOCK_SIZE_TYPE bsize, vp9_writer *w) {
+static void write_selected_tx_size(const VP9_COMP *cpi, TX_SIZE tx_size,
+ BLOCK_SIZE_TYPE bsize, vp9_writer *w) {
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
const vp9_prob *tx_probs = get_tx_probs2(xd, &cpi->common.fc.tx_probs);
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
- if (bsize >= BLOCK_SIZE_MB16X16 && tx_size != TX_4X4) {
+ if (bsize >= BLOCK_16X16 && tx_size != TX_4X4) {
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
- if (bsize >= BLOCK_SIZE_SB32X32 && tx_size != TX_8X8)
+ if (bsize >= BLOCK_32X32 && tx_size != TX_8X8)
vp9_write(w, tx_size != TX_16X16, tx_probs[2]);
}
}
@@ -265,12 +263,17 @@ static void update_switchable_interp_probs(VP9_COMP *const cpi,
static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) {
int i, j;
- for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
- for (j = 0; j < VP9_INTER_MODES - 1; j++) {
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
+ unsigned int branch_ct[VP9_INTER_MODES - 1][2];
+ vp9_prob new_prob[VP9_INTER_MODES - 1];
+
+ vp9_tree_probs_from_distribution(vp9_inter_mode_tree,
+ new_prob, branch_ct,
+ pc->counts.inter_mode[i], NEARESTMV);
+
+ for (j = 0; j < VP9_INTER_MODES - 1; ++j)
vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j],
- VP9_MODE_UPDATE_PROB,
- pc->counts.inter_mode[i][j]);
- }
+ VP9_MODE_UPDATE_PROB, branch_ct[j]);
}
}
@@ -393,8 +396,7 @@ static void encode_ref_frame(VP9_COMP *cpi, vp9_writer *bc) {
// the reference frame is fully coded by the segment
}
-static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
- vp9_writer *bc, int mi_row, int mi_col) {
+static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
VP9_COMMON *const pc = &cpi->common;
const nmv_context *nmvc = &pc->fc.nmvc;
MACROBLOCK *const x = &cpi->mb;
@@ -406,6 +408,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
const int segment_id = mi->segment_id;
int skip_coeff;
const BLOCK_SIZE_TYPE bsize = mi->sb_type;
+ const int allow_hp = xd->allow_high_precision_mv;
x->partition_info = x->pi + (m - pc->mi);
@@ -434,7 +437,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
if (bsize >= BLOCK_SIZE_SB8X8 && pc->tx_mode == TX_MODE_SELECT &&
!(rf != INTRA_FRAME &&
(skip_coeff || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
- write_selected_txfm_size(cpi, mi->txfm_size, bsize, bc);
+ write_selected_tx_size(cpi, mi->txfm_size, bsize, bc);
}
if (rf == INTRA_FRAME) {
@@ -443,18 +446,17 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
#endif
if (bsize >= BLOCK_SIZE_SB8X8) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- const int bsl = MIN(bwl, bhl);
- write_intra_mode(bc, mode, pc->fc.y_mode_prob[MIN(3, bsl)]);
+ write_intra_mode(bc, mode, pc->fc.y_mode_prob[size_group_lookup[bsize]]);
} else {
int idx, idy;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high)
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode;
write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]);
}
+ }
}
write_intra_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]);
} else {
@@ -470,7 +472,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_SIZE_SB8X8) {
write_sb_mv_ref(bc, mode, mv_ref_p);
- vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]);
+ ++pc->counts.inter_mode[mi->mb_mode_context[rf]]
+ [inter_mode_offset(mode)];
}
}
@@ -487,8 +490,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
int j;
MB_PREDICTION_MODE blockmode;
int_mv blockmv;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -496,19 +499,21 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
blockmode = x->partition_info->bmi[j].mode;
blockmv = m->bmi[j].as_mv[0];
write_sb_mv_ref(bc, blockmode, mv_ref_p);
- vp9_accum_mv_refs(&cpi->common, blockmode, mi->mb_mode_context[rf]);
+ ++pc->counts.inter_mode[mi->mb_mode_context[rf]]
+ [inter_mode_offset(blockmode)];
+
if (blockmode == NEWMV) {
#ifdef ENTROPY_STATS
active_section = 11;
#endif
vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv,
- nmvc, xd->allow_high_precision_mv);
+ nmvc, allow_hp);
if (mi->ref_frame[1] > INTRA_FRAME)
vp9_encode_mv(cpi, bc,
&m->bmi[j].as_mv[1].as_mv,
&mi->best_second_mv.as_mv,
- nmvc, xd->allow_high_precision_mv);
+ nmvc, allow_hp);
}
}
}
@@ -516,21 +521,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
#ifdef ENTROPY_STATS
active_section = 5;
#endif
- vp9_encode_mv(cpi, bc,
- &mi->mv[0].as_mv, &mi->best_mv.as_mv,
- nmvc, xd->allow_high_precision_mv);
+ vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv,
+ nmvc, allow_hp);
if (mi->ref_frame[1] > INTRA_FRAME)
- vp9_encode_mv(cpi, bc,
- &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
- nmvc, xd->allow_high_precision_mv);
+ vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
+ nmvc, allow_hp);
}
}
}
-static void write_mb_modes_kf(const VP9_COMP *cpi,
- MODE_INFO *m,
- vp9_writer *bc, int mi_row, int mi_col) {
+static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m,
+ vp9_writer *bc) {
const VP9_COMMON *const c = &cpi->common;
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
const int ym = m->mbmi.mode;
@@ -543,7 +545,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
write_skip_coeff(cpi, segment_id, m, bc);
if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->tx_mode == TX_MODE_SELECT)
- write_selected_txfm_size(cpi, m->mbmi.txfm_size, m->mbmi.sb_type, bc);
+ write_selected_tx_size(cpi, m->mbmi.txfm_size, m->mbmi.sb_type, bc);
if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis);
@@ -552,11 +554,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]);
} else {
int idx, idy;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type];
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- int i = idy * 2 + idx;
+ const int i = idy * 2 + idx;
const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
left_block_mode(m, i) : DC_PRED;
@@ -586,12 +588,12 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
1 << mi_height_log2(m->mbmi.sb_type),
mi_col, 1 << mi_width_log2(m->mbmi.sb_type));
if ((cm->frame_type == KEY_FRAME) || cm->intra_only) {
- write_mb_modes_kf(cpi, m, bc, mi_row, mi_col);
+ write_mb_modes_kf(cpi, m, bc);
#ifdef ENTROPY_STATS
active_section = 8;
#endif
} else {
- pack_inter_mode_mvs(cpi, m, bc, mi_row, mi_col);
+ pack_inter_mode_mvs(cpi, m, bc);
#ifdef ENTROPY_STATS
active_section = 1;
#endif
@@ -625,7 +627,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
if (bsize >= BLOCK_SIZE_SB8X8) {
int pl;
- const int idx = check_bsize_coverage(cm, xd, mi_row, mi_col, bsize);
+ const int idx = check_bsize_coverage(cm, mi_row, mi_col, bsize);
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
// encode the partition information
@@ -692,8 +694,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
vp9_zero(c->left_seg_context);
for (mi_col = c->cur_tile_mi_col_start; mi_col < c->cur_tile_mi_col_end;
mi_col += MI_BLOCK_SIZE, m += MI_BLOCK_SIZE)
- write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col,
- BLOCK_SIZE_SB64X64);
+ write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
}
}
@@ -726,12 +727,12 @@ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) {
fclose(f);
}
-static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE txfm_size) {
- vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[txfm_size];
- vp9_coeff_count *coef_counts = cpi->coef_counts[txfm_size];
+static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
+ vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size];
+ vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size];
unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] =
- cpi->common.counts.eob_branch[txfm_size];
- vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[txfm_size];
+ cpi->common.counts.eob_branch[tx_size];
+ vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size];
vp9_prob full_probs[ENTROPY_NODES];
int i, j, k, l;
@@ -756,9 +757,9 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE txfm_size) {
if (!cpi->dummy_packing) {
int t;
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- context_counters[txfm_size][i][j][k][l][t] +=
+ context_counters[tx_size][i][j][k][l][t] +=
coef_counts[i][j][k][l][t];
- context_counters[txfm_size][i][j][k][l][MAX_ENTROPY_TOKENS] +=
+ context_counters[tx_size][i][j][k][l][MAX_ENTROPY_TOKENS] +=
eob_branch_ct[i][j][k][l];
}
#endif
@@ -1036,15 +1037,15 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
// Probabilities
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
- unsigned int ct_8x8p[TX_SIZE_MAX_SB - 3][2];
- unsigned int ct_16x16p[TX_SIZE_MAX_SB - 2][2];
- unsigned int ct_32x32p[TX_SIZE_MAX_SB - 1][2];
+ unsigned int ct_8x8p[TX_SIZES - 3][2];
+ unsigned int ct_16x16p[TX_SIZES - 2][2];
+ unsigned int ct_32x32p[TX_SIZES - 1][2];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i],
ct_8x8p);
- for (j = 0; j < TX_SIZE_MAX_SB - 3; j++)
+ for (j = 0; j < TX_SIZES - 3; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j],
VP9_MODE_UPDATE_PROB, ct_8x8p[j]);
}
@@ -1052,14 +1053,14 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i],
ct_16x16p);
- for (j = 0; j < TX_SIZE_MAX_SB - 2; j++)
+ for (j = 0; j < TX_SIZES - 2; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
VP9_MODE_UPDATE_PROB, ct_16x16p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
- for (j = 0; j < TX_SIZE_MAX_SB - 1; j++)
+ for (j = 0; j < TX_SIZES - 1; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
VP9_MODE_UPDATE_PROB, ct_32x32p[j]);
}
@@ -1422,7 +1423,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
for (i = 0; i < NUM_PARTITION_CONTEXTS; ++i) {
vp9_prob pnew[PARTITION_TYPES - 1];
unsigned int bct[PARTITION_TYPES - 1][2];
- update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings,
+ update_mode(&header_bc, PARTITION_TYPES,
vp9_partition_tree, pnew,
fc->partition_prob[cm->frame_type][i], bct,
(unsigned int *)cpi->partition_count[i]);
diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h
index 4b49b17a2..3e377cf6f 100644
--- a/libvpx/vp9/encoder/vp9_block.h
+++ b/libvpx/vp9/encoder/vp9_block.h
@@ -47,7 +47,7 @@ typedef struct {
int hybrid_pred_diff;
int comp_pred_diff;
int single_pred_diff;
- int64_t txfm_rd_diff[NB_TXFM_MODES];
+ int64_t tx_rd_diff[TX_MODES];
int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
// Bit flag for each mode whether it has high error in comparison to others.
@@ -72,6 +72,11 @@ struct macroblock_plane {
int16_t zbin_extra;
};
+/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
+ * coefficient in this block was zero) or not. */
+typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
@@ -97,6 +102,7 @@ struct macroblock {
int mv_best_ref_index[MAX_REF_FRAMES];
unsigned int max_mv_context[MAX_REF_FRAMES];
+ unsigned int source_variance;
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
@@ -133,7 +139,7 @@ struct macroblock {
unsigned char *active_ptr;
// note that token_costs is the cost when eob node is skipped
- vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][2];
+ vp9_coeff_cost token_costs[TX_SIZES];
int optimize;
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index 798adc1f3..66eae41da 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -60,11 +60,28 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
* Eventually this should be replaced by custom no-reference routines,
* which will be faster.
*/
-static const uint8_t VP9_VAR_OFFS[16] = {128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128};
+static const uint8_t VP9_VAR_OFFS[64] = {
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128
+};
+
+static unsigned int get_sb_variance(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bs) {
+ unsigned int var, sse;
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
+ x->plane[0].src.stride,
+ VP9_VAR_OFFS, 0, &sse);
+ return var >> num_pels_log2_lookup[bs];
+}
// Original activity measure from Tim T's code.
-static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) {
+static unsigned int tt_activity_measure(MACROBLOCK *x) {
unsigned int act;
unsigned int sse;
/* TODO: This could also be done over smaller areas (8x8), but that would
@@ -106,7 +123,7 @@ static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
mb_activity = alt_activity_measure(cpi, x, use_dc_pred);
} else {
// Original activity measure from Tim T's code.
- mb_activity = tt_activity_measure(cpi, x);
+ mb_activity = tt_activity_measure(x);
}
if (mb_activity < VP9_ACTIVITY_AVG_MIN)
@@ -323,8 +340,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mb_mode_index = ctx->best_mode_index;
const int mis = cpi->common.mode_info_stride;
- const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mb_mode_index < MAX_MODES);
@@ -345,13 +362,13 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
// FIXME(rbultje) I'm pretty sure this should go to the end of this block
// (i.e. after the output_enabled)
- if (bsize < BLOCK_SIZE_SB32X32) {
- if (bsize < BLOCK_SIZE_MB16X16)
- ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];
- ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16];
+ if (bsize < BLOCK_32X32) {
+ if (bsize < BLOCK_16X16)
+ ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
+ ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
}
- if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) {
+ if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
*x->partition_info = ctx->partition_info;
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
@@ -362,9 +379,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
return;
if (!vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
- }
+ for (i = 0; i < TX_MODES; i++)
+ cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
}
if (cpi->common.frame_type == KEY_FRAME) {
@@ -395,7 +411,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
} else {
// Note how often each mode chosen as best
cpi->mode_chosen_counts[mb_mode_index]++;
- if (mbmi->ref_frame[0] != INTRA_FRAME
+ if (is_inter_block(mbmi)
&& (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) {
int_mv best_mv, best_second_mv;
const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
@@ -465,6 +481,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
const int mb_row = mi_row >> 1;
const int mb_col = mi_col >> 1;
const int idx_map = mb_row * cm->mb_cols + mb_col;
+ const struct segmentation *const seg = &xd->seg;
int i;
// entropy context structures
@@ -514,16 +531,16 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
x->rdmult = cpi->RDMULT;
/* segment ID */
- if (xd->seg.enabled) {
- uint8_t *map = xd->seg.update_map ? cpi->segmentation_map
- : cm->last_frame_seg_map;
+ if (seg->enabled) {
+ uint8_t *map = seg->update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
vp9_mb_init_quantizer(cpi, x);
- if (xd->seg.enabled && cpi->seg0_cnt > 0
- && !vp9_segfeature_active(&xd->seg, 0, SEG_LVL_REF_FRAME)
- && vp9_segfeature_active(&xd->seg, 1, SEG_LVL_REF_FRAME)) {
+ if (seg->enabled && cpi->seg0_cnt > 0
+ && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME)
+ && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
} else {
const int y = mb_row & ~3;
@@ -537,8 +554,11 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
<< 16) / cm->MBs;
}
+
+ x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
} else {
mbmi->segment_id = 0;
+ x->encode_breakout = cpi->oxcf.encode_breakout;
}
}
@@ -552,12 +572,17 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
x->rd_search = 1;
- if (bsize < BLOCK_SIZE_SB8X8)
+ if (bsize < BLOCK_SIZE_SB8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
if (xd->ab_index != 0)
return;
+ }
set_offsets(cpi, mi_row, mi_col, bsize);
xd->mode_info_context->mbmi.sb_type = bsize;
+
+ x->source_variance = get_sb_variance(cpi, x, bsize);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
@@ -571,12 +596,12 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
bsize, ctx, best_rd);
}
-static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+static void update_stats(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi = xd->mode_info_context;
- MB_MODE_INFO * const mbmi = &mi->mbmi;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
if (cm->frame_type != KEY_FRAME) {
const int seg_ref_active = vp9_segfeature_active(&xd->seg, mbmi->segment_id,
@@ -612,38 +637,38 @@ static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) {
}
// TODO(jingning): the variables used here are little complicated. need further
-// refactoring on organizing the the temporary buffers, when recursive
+// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD * const xd = &x->e_mbd;
switch (bsize) {
- case BLOCK_SIZE_SB64X64:
+ case BLOCK_64X64:
return &x->sb64_context;
- case BLOCK_SIZE_SB64X32:
+ case BLOCK_64X32:
return &x->sb64x32_context[xd->sb_index];
- case BLOCK_SIZE_SB32X64:
+ case BLOCK_32X64:
return &x->sb32x64_context[xd->sb_index];
- case BLOCK_SIZE_SB32X32:
+ case BLOCK_32X32:
return &x->sb32_context[xd->sb_index];
- case BLOCK_SIZE_SB32X16:
+ case BLOCK_32X16:
return &x->sb32x16_context[xd->sb_index][xd->mb_index];
- case BLOCK_SIZE_SB16X32:
+ case BLOCK_16X32:
return &x->sb16x32_context[xd->sb_index][xd->mb_index];
- case BLOCK_SIZE_MB16X16:
+ case BLOCK_16X16:
return &x->mb_context[xd->sb_index][xd->mb_index];
- case BLOCK_SIZE_SB16X8:
+ case BLOCK_16X8:
return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
- case BLOCK_SIZE_SB8X16:
+ case BLOCK_8X16:
return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
- case BLOCK_SIZE_SB8X8:
+ case BLOCK_8X8:
return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
- case BLOCK_SIZE_SB8X4:
+ case BLOCK_8X4:
return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
- case BLOCK_SIZE_SB4X8:
+ case BLOCK_4X8:
return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
- case BLOCK_SIZE_AB4X4:
+ case BLOCK_4X4:
return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
default:
assert(0);
@@ -655,13 +680,13 @@ static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
switch (bsize) {
- case BLOCK_SIZE_SB64X64:
+ case BLOCK_64X64:
return &x->sb64_partitioning;
- case BLOCK_SIZE_SB32X32:
+ case BLOCK_32X32:
return &x->sb_partitioning[xd->sb_index];
- case BLOCK_SIZE_MB16X16:
+ case BLOCK_16X16:
return &x->mb_partitioning[xd->sb_index][xd->mb_index];
- case BLOCK_SIZE_SB8X8:
+ case BLOCK_8X8:
return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
default:
assert(0);
@@ -674,12 +699,12 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
BLOCK_SIZE_TYPE bsize) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
int p;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int mi_width = num_8x8_blocks_wide_lookup[bsize];
int mi_height = num_8x8_blocks_high_lookup[bsize];
for (p = 0; p < MAX_MB_PLANE; p++) {
@@ -705,12 +730,12 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
BLOCK_SIZE_TYPE bsize) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ const VP9_COMMON *const cm = &cpi->common;
+ const MACROBLOCK *const x = &cpi->mb;
+ const MACROBLOCKD *const xd = &x->e_mbd;
int p;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int mi_width = num_8x8_blocks_wide_lookup[bsize];
int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -746,15 +771,18 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
if (sub_index != -1)
*(get_sb_index(xd, bsize)) = sub_index;
- if (bsize < BLOCK_SIZE_SB8X8)
+ if (bsize < BLOCK_SIZE_SB8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
if (xd->ab_index > 0)
return;
+ }
set_offsets(cpi, mi_row, mi_col, bsize);
update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
if (output_enabled) {
- update_stats(cpi, mi_row, mi_col);
+ update_stats(cpi);
(*tp)->token = EOSB_TOKEN;
(*tp)++;
@@ -776,7 +804,7 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- c1 = BLOCK_SIZE_AB4X4;
+ c1 = BLOCK_4X4;
if (bsize >= BLOCK_SIZE_SB8X8) {
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
@@ -858,7 +886,7 @@ static void set_block_size(VP9_COMMON * const cm, MODE_INFO *m,
int bhl = b_height_log2(bsize);
int bsl = (bwl > bhl ? bwl : bhl);
- int bs = (1 << bsl) / 2; //
+ int bs = (1 << bsl) / 2; // Block size in units of 8 pels.
MODE_INFO *m2 = m + mi_row * mis + mi_col;
for (row = 0; row < bs; row++) {
for (col = 0; col < bs; col++) {
@@ -906,28 +934,28 @@ typedef enum {
static void tree_to_node(void *data, BLOCK_SIZE_TYPE block_size, vt_node *node) {
int i;
switch (block_size) {
- case BLOCK_SIZE_SB64X64: {
+ case BLOCK_64X64: {
v64x64 *vt = (v64x64 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
- case BLOCK_SIZE_SB32X32: {
+ case BLOCK_32X32: {
v32x32 *vt = (v32x32 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
- case BLOCK_SIZE_MB16X16: {
+ case BLOCK_16X16: {
v16x16 *vt = (v16x16 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
- case BLOCK_SIZE_SB8X8: {
+ case BLOCK_8X8: {
v8x8 *vt = (v8x8 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
@@ -1066,8 +1094,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
int dp;
int pixels_wide = 64, pixels_high = 64;
- vpx_memset(&vt, 0, sizeof(vt));
-
+ vp9_zero(vt);
set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
if (xd->mb_to_right_edge < 0)
@@ -1087,7 +1114,8 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
dp = 64;
if (cm->frame_type != KEY_FRAME) {
int_mv nearest_mv, near_mv;
- YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[0];
+ const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
+ YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
@@ -1103,7 +1131,6 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_SB64X64);
d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride;
-
}
// Fill in the entire tree of 8x8 variances for splits.
@@ -1130,32 +1157,32 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
// values.
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- fill_variance_tree(&vt.split[i].split[j], BLOCK_SIZE_MB16X16);
+ fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
}
- fill_variance_tree(&vt.split[i], BLOCK_SIZE_SB32X32);
+ fill_variance_tree(&vt.split[i], BLOCK_32X32);
}
- fill_variance_tree(&vt, BLOCK_SIZE_SB64X64);
+ fill_variance_tree(&vt, BLOCK_64X64);
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
- if (!set_vt_partitioning(cpi, &vt, m, BLOCK_SIZE_SB64X64, mi_row, mi_col,
+ if (!set_vt_partitioning(cpi, &vt, m, BLOCK_64X64, mi_row, mi_col,
4)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
- if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_SIZE_SB32X32,
+ if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_32X32,
(mi_row + y32_idx), (mi_col + x32_idx), 2)) {
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
if (!set_vt_partitioning(cpi, &vt.split[i].split[j], m,
- BLOCK_SIZE_MB16X16,
+ BLOCK_16X16,
(mi_row + y32_idx + y16_idx),
(mi_col + x32_idx + x16_idx), 1)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
- set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis,
+ set_block_size(cm, m, BLOCK_8X8, mis,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx));
}
@@ -1165,6 +1192,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
}
}
}
+
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
int *rate, int64_t *dist, int do_recon) {
@@ -1173,8 +1201,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
int bsl = b_width_log2(bsize);
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int ms = num_4x4_blocks_wide / 2;
int mh = num_4x4_blocks_high / 2;
int bss = (1 << bsl) / 4;
@@ -1191,7 +1219,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int64_t none_dist = INT_MAX;
int chosen_rate = INT_MAX;
int64_t chosen_dist = INT_MAX;
- BLOCK_SIZE_TYPE sub_subsize = BLOCK_SIZE_AB4X4;
+ BLOCK_SIZE_TYPE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE_TYPE bs_type = m->mbmi.sb_type;
@@ -1203,6 +1231,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
subsize = get_subsize(bsize, partition);
if (bsize < BLOCK_SIZE_SB8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
if (xd->ab_index != 0) {
*rate = 0;
*dist = 0;
@@ -1213,6 +1243,10 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
}
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ x->fast_ms = 0;
+ x->pred_mv.as_int = 0;
+ x->subblock_ref = 0;
+
if (cpi->sf.adjust_partitioning_from_last_frame) {
// Check if any of the sub blocks are further split.
if (partition == PARTITION_SPLIT && subsize > BLOCK_SIZE_SB8X8) {
@@ -1422,9 +1456,59 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
*dist = chosen_dist;
}
+static BLOCK_SIZE_TYPE min_partition_size[BLOCK_SIZE_TYPES] =
+ { BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
+ BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 };
+static BLOCK_SIZE_TYPE max_partition_size[BLOCK_SIZE_TYPES] =
+ { BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
+ BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 };
+
+
+// Look at neighboring blocks and set a min and max partition size based on
+// what they chose.
+static void rd_auto_partition_range(VP9_COMP *cpi,
+ BLOCK_SIZE_TYPE * min_block_size,
+ BLOCK_SIZE_TYPE * max_block_size) {
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const MODE_INFO *const mi = xd->mode_info_context;
+ const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
+ const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
+ const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
+ const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
+
+ // Frequency check
+ if (cpi->sf.auto_min_max_partition_count <= 0) {
+ cpi->sf.auto_min_max_partition_count =
+ cpi->sf.auto_min_max_partition_interval;
+ *min_block_size = BLOCK_4X4;
+ *max_block_size = BLOCK_64X64;
+ return;
+ } else {
+ --cpi->sf.auto_min_max_partition_count;
+ }
+
+ // Check for edge cases
+ if (!left_in_image && !above_in_image) {
+ *min_block_size = BLOCK_4X4;
+ *max_block_size = BLOCK_64X64;
+ } else if (!left_in_image) {
+ *min_block_size = min_partition_size[above_mbmi->sb_type];
+ *max_block_size = max_partition_size[above_mbmi->sb_type];
+ } else if (!above_in_image) {
+ *min_block_size = min_partition_size[left_mbmi->sb_type];
+ *max_block_size = max_partition_size[left_mbmi->sb_type];
+ } else {
+ *min_block_size =
+ min_partition_size[MIN(left_mbmi->sb_type, above_mbmi->sb_type)];
+ *max_block_size =
+ max_partition_size[MAX(left_mbmi->sb_type, above_mbmi->sb_type)];
+ }
+}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
-// unlikely to be selected depending on previously rate-distortion optimization
+// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
@@ -1444,20 +1528,22 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
(void) *tp_orig;
- if (bsize < BLOCK_SIZE_SB8X8)
+ if (bsize < BLOCK_SIZE_SB8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
if (xd->ab_index != 0) {
*rate = 0;
*dist = 0;
return;
}
+ }
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
// PARTITION_SPLIT
- if (!cpi->sf.use_partitions_greater_than
- || (cpi->sf.use_partitions_greater_than
- && bsize > cpi->sf.greater_than_block_size)) {
+ if (!cpi->sf.auto_min_max_partition_size ||
+ bsize >= cpi->sf.min_partition_size) {
if (bsize > BLOCK_SIZE_SB8X8) {
int r4 = 0;
int64_t d4 = 0, sum_rd = 0;
@@ -1500,41 +1586,39 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
}
}
+ // Use 4 subblocks' motion estimation results to speed up current
+ // partition's checking.
x->fast_ms = 0;
x->pred_mv.as_int = 0;
x->subblock_ref = 0;
- // Use 4 subblocks' motion estimation results to speed up current
- // partition's checking.
- if (cpi->sf.using_small_partition_info) {
+ if (cpi->sf.using_small_partition_info &&
+ (!cpi->sf.auto_min_max_partition_size ||
+ (bsize <= cpi->sf.max_partition_size &&
+ bsize >= cpi->sf.min_partition_size))) {
// Only use 8x8 result for non HD videos.
// int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
int use_8x8 = 1;
if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
- ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) ||
- bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) {
+ ((use_8x8 && bsize == BLOCK_16X16) ||
+ bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) {
int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
+ PICK_MODE_CONTEXT *block_context = NULL;
- if (bsize == BLOCK_SIZE_MB16X16) {
- ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.
- ref_frame[0];
- ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.
- ref_frame[0];
- ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.
- ref_frame[0];
- ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.
- ref_frame[0];
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0];
- ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0];
- ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0];
- ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0];
+ if (bsize == BLOCK_16X16) {
+ block_context = x->sb8x8_context[xd->sb_index][xd->mb_index];
+ } else if (bsize == BLOCK_32X32) {
+ block_context = x->mb_context[xd->sb_index];
} else if (bsize == BLOCK_SIZE_SB64X64) {
- ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0];
- ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0];
- ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0];
- ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0];
+ block_context = x->sb32_context;
+ }
+
+ if (block_context) {
+ ref0 = block_context[0].mic.mbmi.ref_frame[0];
+ ref1 = block_context[1].mic.mbmi.ref_frame[0];
+ ref2 = block_context[2].mic.mbmi.ref_frame[0];
+ ref3 = block_context[3].mic.mbmi.ref_frame[0];
}
// Currently, only consider 4 inter ref frames.
@@ -1544,42 +1628,14 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
int d01, d23, d02, d13; // motion vector distance between 2 blocks
// Get each subblock's motion vectors.
- if (bsize == BLOCK_SIZE_MB16X16) {
- mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
- as_mv.row;
- mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
- as_mv.col;
- mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
- as_mv.row;
- mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
- as_mv.col;
- mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
- as_mv.row;
- mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
- as_mv.col;
- mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
- as_mv.row;
- mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
- as_mv.col;
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row;
- mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col;
- mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row;
- mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col;
- mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row;
- mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col;
- mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row;
- mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col;
- } else if (bsize == BLOCK_SIZE_SB64X64) {
- mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row;
- mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col;
- mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row;
- mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col;
- mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row;
- mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col;
- mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row;
- mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col;
- }
+ mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row;
+ mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col;
+ mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row;
+ mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col;
+ mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row;
+ mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col;
+ mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row;
+ mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col;
// Adjust sign if ref is alt_ref
if (cm->ref_frame_sign_bias[ref0]) {
@@ -1631,9 +1687,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
}
}
- if (!cpi->sf.use_partitions_less_than
- || (cpi->sf.use_partitions_less_than
- && bsize <= cpi->sf.less_than_block_size)) {
+ if (!cpi->sf.auto_min_max_partition_size ||
+ bsize <= cpi->sf.max_partition_size) {
int larger_is_better = 0;
// PARTITION_NONE
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
@@ -1804,8 +1859,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
}
// Examines 64x64 block and chooses a best reference frame
-static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
- int mi_col, int *rate, int64_t *dist) {
+static void rd_pick_reference_frame(VP9_COMP *cpi, int mi_row, int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD * const xd = &x->e_mbd;
@@ -1836,23 +1890,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
cpi->set_ref_frame_mask = 0;
}
- *rate = r;
- *dist = d;
- // RDCOST(x->rdmult, x->rddiv, r, d)
-
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_SIZE_SB64X64);
-
- /*if (srate < INT_MAX && sdist < INT_MAX)
- encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64);
-
- if (bsize == BLOCK_SIZE_SB64X64) {
- assert(tp_orig < *tp);
- assert(srate < INT_MAX);
- assert(sdist < INT_MAX);
- } else {
- assert(tp_orig == *tp);
- }
- */
}
static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
@@ -1877,10 +1915,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
else
cpi->unused_mode_skip_mask = 0xFFFFFFFFFFFFFE00;
- if (cpi->sf.reference_masking) {
- rd_pick_reference_frame(cpi, tp, mi_row, mi_col,
- &dummy_rate, &dummy_dist);
- }
+ if (cpi->sf.reference_masking)
+ rd_pick_reference_frame(cpi, mi_row, mi_col);
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
cpi->sf.use_one_partition_size_always ) {
@@ -1888,6 +1924,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
MODE_INFO *m = cm->mi + idx_str;
MODE_INFO *p = cm->prev_mi + idx_str;
+ cpi->mb.source_variance = UINT_MAX;
if (cpi->sf.use_one_partition_size_always) {
set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
set_partitioning(cpi, m, cpi->sf.always_this_block_size);
@@ -1904,6 +1941,12 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|| cpi->common.show_frame == 0
|| cpi->common.frame_type == KEY_FRAME
|| cpi->is_src_frame_alt_ref) {
+ // If required set upper and lower partition size limits
+ if (cpi->sf.auto_min_max_partition_size) {
+ rd_auto_partition_range(cpi,
+ &cpi->sf.min_partition_size,
+ &cpi->sf.max_partition_size);
+ }
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist, 1, INT64_MAX);
} else {
@@ -1913,6 +1956,12 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
}
}
} else {
+ // If required set upper and lower partition size limits
+ if (cpi->sf.auto_min_max_partition_size) {
+ rd_auto_partition_range(cpi, &cpi->sf.min_partition_size,
+ &cpi->sf.max_partition_size);
+ }
+
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist, 1, INT64_MAX);
}
@@ -2086,7 +2135,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
vpx_usec_timer_mark(&emr_timer);
- cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
+ cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
}
if (cpi->sf.skip_encode_sb) {
@@ -2203,13 +2252,13 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
int n;
assert(bwl < bsl && bhl < bsl);
- if (bsize == BLOCK_SIZE_SB64X64) {
- subsize = BLOCK_SIZE_SB32X32;
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- subsize = BLOCK_SIZE_MB16X16;
+ if (bsize == BLOCK_64X64) {
+ subsize = BLOCK_32X32;
+ } else if (bsize == BLOCK_32X32) {
+ subsize = BLOCK_16X16;
} else {
- assert(bsize == BLOCK_SIZE_MB16X16);
- subsize = BLOCK_SIZE_SB8X8;
+ assert(bsize == BLOCK_16X16);
+ subsize = BLOCK_8X8;
}
for (n = 0; n < 4; n++) {
@@ -2267,7 +2316,7 @@ static void select_tx_mode(VP9_COMP *cpi) {
} else {
unsigned int total = 0;
int i;
- for (i = 0; i < TX_SIZE_MAX_SB; ++i)
+ for (i = 0; i < TX_SIZES; ++i)
total += cpi->txfm_stepdown_count[i];
if (total) {
double fraction = (double)cpi->txfm_stepdown_count[0] / total;
@@ -2376,12 +2425,12 @@ void vp9_encode_frame(VP9_COMP *cpi) {
(cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
}
- for (i = 0; i < NB_TXFM_MODES; ++i) {
+ for (i = 0; i < TX_MODES; ++i) {
int64_t pd = cpi->rd_tx_select_diff[i];
int diff;
if (i == TX_MODE_SELECT)
pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
- 2048 * (TX_SIZE_MAX_SB - 1), 0);
+ 2048 * (TX_SIZES - 1), 0);
diff = (int) (pd / cpi->common.MBs);
cpi->rd_tx_select_threshes[frame_type][i] += diff;
cpi->rd_tx_select_threshes[frame_type][i] /= 2;
@@ -2527,7 +2576,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
// Increase zbin size to suppress noise
cpi->zbin_mode_boost = 0;
if (cpi->zbin_mode_boost_enabled) {
- if (mbmi->ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(mbmi)) {
if (mbmi->mode == ZEROMV) {
if (mbmi->ref_frame[0] != LAST_FRAME)
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
@@ -2600,7 +2649,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT &&
mbmi->sb_type >= BLOCK_SIZE_SB8X8 &&
- !(mbmi->ref_frame[0] != INTRA_FRAME &&
+ !(is_inter_block(mbmi) &&
(mbmi->mb_skip_coeff ||
vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)))) {
const uint8_t context = vp9_get_pred_context_tx_size(xd);
@@ -2609,14 +2658,14 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
int x, y;
TX_SIZE sz = (cm->tx_mode == TX_MODE_SELECT) ? TX_32X32 : cm->tx_mode;
// The new intra coding scheme requires no change of transform size
- if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
- if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
+ if (is_inter_block(&mi->mbmi)) {
+ if (sz == TX_32X32 && bsize < BLOCK_32X32)
sz = TX_16X16;
- if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
+ if (sz == TX_16X16 && bsize < BLOCK_16X16)
sz = TX_8X8;
- if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
+ if (sz == TX_8X8 && bsize < BLOCK_8X8)
sz = TX_4X4;
- } else if (bsize >= BLOCK_SIZE_SB8X8) {
+ } else if (bsize >= BLOCK_8X8) {
sz = mbmi->txfm_size;
} else {
sz = TX_4X4;
diff --git a/libvpx/vp9/encoder/vp9_encodeintra.c b/libvpx/vp9/encoder/vp9_encodeintra.c
index d49e53258..edbd2d909 100644
--- a/libvpx/vp9/encoder/vp9_encodeintra.c
+++ b/libvpx/vp9/encoder/vp9_encodeintra.c
@@ -21,7 +21,7 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
x->skip_encode = 0;
mbmi->mode = DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_SIZE_MB16X16 ?
+ mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ?
TX_16X16 : TX_8X8) : TX_4X4;
vp9_encode_intra_block_y(&cpi->common, x, mbmi->sb_type);
return vp9_get_mb_ss(x->plane[0].src_diff);
diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c
index 66e35a991..40b0a4e5a 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/libvpx/vp9/encoder/vp9_encodemb.c
@@ -47,6 +47,27 @@ static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob,
xd->inv_txm4x4_add(dqcoeff, dest, stride);
}
+static void inverse_transform_b_8x8_add(int eob,
+ int16_t *dqcoeff, uint8_t *dest,
+ int stride) {
+ if (eob <= 1)
+ vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
+ else if (eob <= 10)
+ vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
+ else
+ vp9_short_idct8x8_add(dqcoeff, dest, stride);
+}
+
+static void inverse_transform_b_16x16_add(int eob,
+ int16_t *dqcoeff, uint8_t *dest,
+ int stride) {
+ if (eob <= 1)
+ vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
+ else if (eob <= 10)
+ vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+ else
+ vp9_short_idct16x16_add(dqcoeff, dest, stride);
+}
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
@@ -120,12 +141,12 @@ static int trellis_get_coeff_context(const int16_t *scan,
return pt;
}
-static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
+static void optimize_b(MACROBLOCK *mb,
int plane, int block, BLOCK_SIZE_TYPE bsize,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
TX_SIZE tx_size) {
- const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME;
MACROBLOCKD *const xd = &mb->e_mbd;
+ const int ref = is_inter_block(&xd->mode_info_context->mbmi);
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
@@ -214,10 +235,10 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
- mb->token_costs[tx_size][type][ref][0][band][pt]
+ mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][0].token];
rate1 +=
- mb->token_costs[tx_size][type][ref][0][band][pt]
+ mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][1].token];
}
UPDATE_RD_COST();
@@ -265,12 +286,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt]
+ rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
- rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt]
+ rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][1].token];
}
}
@@ -303,12 +324,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
/* Update the cost of each path if we're past the EOB token. */
if (t0 != DCT_EOB_TOKEN) {
tokens[next][0].rate +=
- mb->token_costs[tx_size][type][ref][1][band][0][t0];
+ mb->token_costs[tx_size][type][ref][band][1][0][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != DCT_EOB_TOKEN) {
tokens[next][1].rate +=
- mb->token_costs[tx_size][type][ref][1][band][0][t1];
+ mb->token_costs[tx_size][type][ref][band][1][0][t1];
tokens[next][1].token = ZERO_TOKEN;
}
best_index[i][0] = best_index[i][1] = 0;
@@ -325,8 +346,8 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][ref][0][band][pt][t0];
- rate1 += mb->token_costs[tx_size][type][ref][0][band][pt][t1];
+ rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
+ rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
@@ -351,7 +372,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
}
void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
- int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb,
+ int ss_txfrm_size, MACROBLOCK *mb,
struct optimize_ctx *ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
int x, y;
@@ -359,51 +380,61 @@ void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
// find current entropy context
txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
- optimize_b(cm, mb, plane, block, bsize,
+ optimize_b(mb, plane, block, bsize,
&ctx->ta[plane][x], &ctx->tl[plane][y], ss_txfrm_size / 2);
}
static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
const struct encode_b_args* const args = arg;
- vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x,
- args->ctx);
+ vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->x, args->ctx);
}
-void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
- struct optimize_ctx *ctx) {
- int p;
-
- for (p = 0; p < MAX_MB_PLANE; p++) {
- const struct macroblockd_plane* const plane = &xd->plane[p];
- const int bwl = b_width_log2(bsize) - plane->subsampling_x;
- const int bhl = b_height_log2(bsize) - plane->subsampling_y;
- const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
- const TX_SIZE tx_size = p ? get_uv_tx_size(mbmi)
- : mbmi->txfm_size;
- int i, j;
-
- for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
- int c = 0;
- ctx->ta[p][i] = 0;
- for (j = 0; j < 1 << tx_size && !c; j++) {
- c = ctx->ta[p][i] |= plane->above_context[i + j];
- }
- }
- for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
- int c = 0;
- ctx->tl[p][i] = 0;
- for (j = 0; j < 1 << tx_size && !c; j++) {
- c = ctx->tl[p][i] |= plane->left_context[i + j];
- }
- }
+void optimize_init_b(int plane, BLOCK_SIZE_TYPE bsize, void *arg) {
+ const struct encode_b_args* const args = arg;
+ const MACROBLOCKD *xd = &args->x->e_mbd;
+ const struct macroblockd_plane* const pd = &xd->plane[plane];
+ const int bwl = b_width_log2(bsize) - pd->subsampling_x;
+ const int bhl = b_height_log2(bsize) - pd->subsampling_y;
+ const int bw = 1 << bwl, bh = 1 << bhl;
+ const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->txfm_size;
+ int i;
+
+ switch (tx_size) {
+ case TX_4X4:
+ vpx_memcpy(args->ctx->ta[plane], pd->above_context,
+ sizeof(ENTROPY_CONTEXT) * bw);
+ vpx_memcpy(args->ctx->tl[plane], pd->left_context,
+ sizeof(ENTROPY_CONTEXT) * bh);
+ break;
+ case TX_8X8:
+ for (i = 0; i < bw; i += 2)
+ args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i];
+ for (i = 0; i < bh; i += 2)
+ args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i];
+ break;
+ case TX_16X16:
+ for (i = 0; i < bw; i += 4)
+ args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i];
+ for (i = 0; i < bh; i += 4)
+ args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i];
+ break;
+ case TX_32X32:
+ for (i = 0; i < bw; i += 8)
+ args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i];
+ for (i = 0; i < bh; i += 8)
+ args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i];
+ break;
+ default:
+ assert(0);
}
}
void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
struct optimize_ctx ctx;
struct encode_b_args arg = {cm, x, &ctx};
- vp9_optimize_init(&x->e_mbd, bsize, &ctx);
+ optimize_init_b(0, bsize, &arg);
foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, optimize_block, &arg);
}
@@ -411,7 +442,10 @@ void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize) {
struct optimize_ctx ctx;
struct encode_b_args arg = {cm, x, &ctx};
- vp9_optimize_init(&x->e_mbd, bsize, &ctx);
+ int i;
+ for (i = 1; i < MAX_MB_PLANE; ++i)
+ optimize_init_b(i, bsize, &arg);
+
foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
}
@@ -504,7 +538,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
xform_quant(plane, block, bsize, ss_txfrm_size, arg);
if (x->optimize)
- vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
+ vp9_optimize_b(plane, block, bsize, ss_txfrm_size, x, args->ctx);
if (x->skip_encode)
return;
@@ -516,10 +550,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
break;
case TX_16X16:
- vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst,
+ pd->dst.stride);
break;
case TX_8X8:
- vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst,
+ pd->dst.stride);
break;
case TX_4X4:
// this is like vp9_short_idct4x4 but has a special case around eob<=1
@@ -553,7 +589,7 @@ void vp9_encode_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
vp9_subtract_sby(x, bsize);
if (x->optimize)
- vp9_optimize_init(xd, bsize, &ctx);
+ optimize_init_b(0, bsize, &arg);
foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg);
}
@@ -564,8 +600,11 @@ void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
struct encode_b_args arg = {cm, x, &ctx};
vp9_subtract_sbuv(x, bsize);
- if (x->optimize)
- vp9_optimize_init(xd, bsize, &ctx);
+ if (x->optimize) {
+ int i;
+ for (i = 1; i < MAX_MB_PLANE; ++i)
+ optimize_init_b(i, bsize, &arg);
+ }
foreach_transformed_block_uv(xd, bsize, encode_block, &arg);
}
@@ -576,8 +615,12 @@ void vp9_encode_sb(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
struct encode_b_args arg = {cm, x, &ctx};
vp9_subtract_sb(x, bsize);
- if (x->optimize)
- vp9_optimize_init(xd, bsize, &ctx);
+
+ if (x->optimize) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ optimize_init_b(i, bsize, &arg);
+ }
foreach_transformed_block(xd, bsize, encode_block, &arg);
}
@@ -610,7 +653,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
// if (x->optimize)
// vp9_optimize_b(plane, block, bsize, ss_txfrm_size,
- // args->cm, x, args->ctx);
+ // x, args->ctx);
switch (tx_size) {
case TX_32X32:
@@ -661,7 +704,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
- vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
}
@@ -690,7 +733,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
- vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type);
}
@@ -699,11 +742,11 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
scan = get_scan_4x4(tx_type);
iscan = get_iscan_4x4(tx_type);
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8 && plane == 0) {
+ if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
mode = xd->mode_info_context->bmi[block].as_mode;
- } else {
+ else
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- }
+
xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
@@ -725,8 +768,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- inverse_transform_b_4x4_add(xd, *eob, dqcoeff,
- dst, pd->dst.stride);
+ inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type);
}
diff --git a/libvpx/vp9/encoder/vp9_encodemb.h b/libvpx/vp9/encoder/vp9_encodemb.h
index defaa48a3..f647fd979 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.h
+++ b/libvpx/vp9/encoder/vp9_encodemb.h
@@ -33,10 +33,8 @@ struct encode_b_args {
struct optimize_ctx *ctx;
};
-void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
- struct optimize_ctx *ctx);
void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
- int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x,
+ int ss_txfrm_size, MACROBLOCK *x,
struct optimize_ctx *ctx);
void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_optimize_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c
index 2f5e16ccf..1c6fa3a3d 100644
--- a/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/libvpx/vp9/encoder/vp9_encodemv.c
@@ -478,7 +478,7 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int idx, idy;
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
+ if (mbmi->sb_type < BLOCK_8X8) {
PARTITION_INFO *pi = x->partition_info;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index ec2e361ee..6ba2a4fc9 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -347,17 +347,17 @@ static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r
xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset;
switch (xd->mode_info_context->mbmi.sb_type) {
- case BLOCK_SIZE_SB8X8:
+ case BLOCK_8X8:
vp9_mse8x8(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
(unsigned int *)(best_motion_err));
break;
- case BLOCK_SIZE_SB16X8:
+ case BLOCK_16X8:
vp9_mse16x8(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
(unsigned int *)(best_motion_err));
break;
- case BLOCK_SIZE_SB8X16:
+ case BLOCK_8X16:
vp9_mse8x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride,
(unsigned int *)(best_motion_err));
@@ -403,13 +403,13 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// override the default variance function to use MSE
switch (xd->mode_info_context->mbmi.sb_type) {
- case BLOCK_SIZE_SB8X8:
+ case BLOCK_8X8:
v_fn_ptr.vf = vp9_mse8x8;
break;
- case BLOCK_SIZE_SB16X8:
+ case BLOCK_16X8:
v_fn_ptr.vf = vp9_mse16x8;
break;
- case BLOCK_SIZE_SB8X16:
+ case BLOCK_8X16:
v_fn_ptr.vf = vp9_mse8x16;
break;
default:
@@ -549,15 +549,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
if (mb_col * 2 + 1 < cm->mi_cols) {
if (mb_row * 2 + 1 < cm->mi_rows) {
- xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
+ xd->mode_info_context->mbmi.sb_type = BLOCK_16X16;
} else {
- xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB16X8;
+ xd->mode_info_context->mbmi.sb_type = BLOCK_16X8;
}
} else {
if (mb_row * 2 + 1 < cm->mi_rows) {
- xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X16;
+ xd->mode_info_context->mbmi.sb_type = BLOCK_8X16;
} else {
- xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X8;
+ xd->mode_info_context->mbmi.sb_type = BLOCK_8X8;
}
}
xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
@@ -1282,7 +1282,6 @@ static int detect_flash(VP9_COMP *cpi, int offset) {
// Update the motion related elements to the GF arf boost calculation
static void accumulate_frame_motion_stats(
- VP9_COMP *cpi,
FIRSTPASS_STATS *this_frame,
double *this_frame_mv_in_out,
double *mv_in_out_accumulator,
@@ -1377,7 +1376,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
break;
// Update the motion related elements to the boost calculation
- accumulate_frame_motion_stats(cpi, &this_frame,
+ accumulate_frame_motion_stats(&this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
@@ -1413,7 +1412,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
break;
// Update the motion related elements to the boost calculation
- accumulate_frame_motion_stats(cpi, &this_frame,
+ accumulate_frame_motion_stats(&this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
@@ -1665,7 +1664,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
flash_detected = detect_flash(cpi, 0);
// Update the motion related elements to the boost calculation
- accumulate_frame_motion_stats(cpi, &next_frame,
+ accumulate_frame_motion_stats(&next_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
@@ -2139,8 +2138,7 @@ void vp9_second_pass(VP9_COMP *cpi) {
adjust_active_maxq(cpi->active_worst_quality, tmp_q);
}
#endif
-
- vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS));
+ vp9_zero(this_frame);
if (EOF == input_stats(cpi, &this_frame))
return;
@@ -2318,7 +2316,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double kf_group_coded_err = 0.0;
double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
- vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+ vp9_zero(next_frame);
vp9_clear_system_state(); // __asm emms;
start_position = cpi->twopass.stats_in;
diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c
index 7d6db071d..154d31af6 100644
--- a/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -63,7 +63,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
}
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
- vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
+ vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
INT_MAX);
@@ -77,9 +77,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
return best_err;
}
-static int do_16x16_motion_search(VP9_COMP *cpi,
- int_mv *ref_mv, int_mv *dst_mv,
- int buf_mb_y_offset, int mb_y_offset,
+static int do_16x16_motion_search(VP9_COMP *cpi, int_mv *ref_mv, int_mv *dst_mv,
int mb_row, int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -118,9 +116,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi,
return err;
}
-static int do_16x16_zerozero_search(VP9_COMP *cpi,
- int_mv *dst_mv,
- int buf_mb_y_offset, int mb_y_offset) {
+static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err;
@@ -210,7 +206,6 @@ static void update_mbgraph_mb_stats
g_motion_error = do_16x16_motion_search(cpi,
prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv,
- mb_y_offset, gld_y_offset,
mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
@@ -224,8 +219,7 @@ static void update_mbgraph_mb_stats
xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
xd->plane[0].pre[0].stride = alt_ref->y_stride;
a_motion_error = do_16x16_zerozero_search(cpi,
- &stats->ref[ALTREF_FRAME].m.mv,
- mb_y_offset, arf_y_offset);
+ &stats->ref[ALTREF_FRAME].m.mv);
stats->ref[ALTREF_FRAME].err = a_motion_error;
} else {
@@ -248,8 +242,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
int_mv arf_top_mv, gld_top_mv;
MODE_INFO mi_local;
- // Make sure the mi context starts in a consistent state.
- memset(&mi_local, 0, sizeof(mi_local));
+ vp9_zero(mi_local);
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
@@ -262,7 +255,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
xd->plane[0].pre[0].stride = buf->y_stride;
xd->plane[1].dst.stride = buf->uv_stride;
xd->mode_info_context = &mi_local;
- mi_local.mbmi.sb_type = BLOCK_SIZE_MB16X16;
+ mi_local.mbmi.sb_type = BLOCK_16X16;
mi_local.mbmi.ref_frame[0] = LAST_FRAME;
mi_local.mbmi.ref_frame[1] = NONE;
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index 0be98913e..88beee791 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -58,7 +58,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) {
}
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int weight, int ishp) {
+ int weight) {
MV v;
v.row = mv->as_mv.row - ref->as_mv.row;
v.col = mv->as_mv.col - ref->as_mv.col;
@@ -68,7 +68,7 @@ int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
}
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int error_per_bit, int ishp) {
+ int error_per_bit) {
if (mvcost) {
MV v;
v.row = mv->as_mv.row - ref->as_mv.row;
@@ -269,7 +269,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
int maxc, minc, maxr, minr;
int y_stride;
int offset;
- int usehp = xd->allow_high_precision_mv;
uint8_t *y = xd->plane[0].pre[0].buf +
(bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
@@ -300,8 +299,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
// calculate central point error
besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
*distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
// TODO: Each subsequent iteration checks at least one point in
// common with the last iteration could be 2 ( if diag selected)
@@ -371,13 +369,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
tc = bc;
}
- if (xd->allow_high_precision_mv) {
- usehp = vp9_use_mv_hp(&ref_mv->as_mv);
- } else {
- usehp = 0;
- }
-
- if (usehp) {
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
hstep >>= 1;
while (--eighthiters) {
CHECK_BETTER(left, tr, tc - hstep);
@@ -451,7 +443,6 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
int maxc, minc, maxr, minr;
int y_stride;
int offset;
- int usehp = xd->allow_high_precision_mv;
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
uint8_t *y = xd->plane[0].pre[0].buf +
@@ -490,8 +481,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
*distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
// Each subsequent iteration checks at least one point in
// common with the last iteration could be 2 ( if diag selected)
@@ -561,13 +551,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
tc = bc;
}
- if (xd->allow_high_precision_mv) {
- usehp = vp9_use_mv_hp(&ref_mv->as_mv);
- } else {
- usehp = 0;
- }
-
- if (usehp) {
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
hstep >>= 1;
while (--eighthiters) {
CHECK_BETTER(left, tr, tc - hstep);
@@ -638,7 +622,6 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
int thismse;
int y_stride;
MACROBLOCKD *xd = &x->e_mbd;
- int usehp = xd->allow_high_precision_mv;
uint8_t *y = xd->plane[0].pre[0].buf +
(bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
@@ -654,15 +637,14 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
// calculate central point error
bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
*distortion = bestmse;
- bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (left < bestmse) {
*bestmv = this_mv;
@@ -674,7 +656,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ error_per_bit);
if (right < bestmse) {
*bestmv = this_mv;
@@ -687,8 +669,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (up < bestmse) {
*bestmv = this_mv;
@@ -699,8 +680,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (down < bestmse) {
*bestmv = this_mv;
@@ -742,8 +723,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
break;
}
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (diag < bestmse) {
*bestmv = this_mv;
@@ -784,8 +765,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
src_stride, &sse);
}
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (left < bestmse) {
*bestmv = this_mv;
@@ -799,7 +780,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
z, src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ error_per_bit);
if (right < bestmse) {
*bestmv = this_mv;
@@ -822,8 +803,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
z, src_stride, &sse);
}
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (up < bestmse) {
*bestmv = this_mv;
@@ -835,8 +815,9 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
this_mv.as_mv.row += 4;
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
+
if (down < bestmse) {
*bestmv = this_mv;
@@ -923,8 +904,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
break;
}
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (diag < bestmse) {
*bestmv = this_mv;
@@ -933,12 +914,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
*sse1 = sse;
}
- if (x->e_mbd.allow_high_precision_mv) {
- usehp = vp9_use_mv_hp(&ref_mv->as_mv);
- } else {
- usehp = 0;
- }
- if (!usehp)
+ if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)))
return bestmse;
/* Now do 1/8th pixel */
@@ -968,8 +944,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
z, src_stride, &sse);
}
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (left < bestmse) {
*bestmv = this_mv;
@@ -982,7 +958,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
z, src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ error_per_bit);
if (right < bestmse) {
*bestmv = this_mv;
@@ -1005,8 +981,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
}
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (up < bestmse) {
*bestmv = this_mv;
@@ -1019,8 +994,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
thismse = vfp->svf(y, y_stride,
SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (down < bestmse) {
*bestmv = this_mv;
@@ -1107,8 +1082,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
break;
}
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (diag < bestmse) {
*bestmv = this_mv;
@@ -1153,15 +1128,14 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x,
// calculate central point error
bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
*distortion = bestmse;
- bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (left < bestmse) {
*bestmv = this_mv;
@@ -1173,7 +1147,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x,
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit, xd->allow_high_precision_mv);
+ error_per_bit);
if (right < bestmse) {
*bestmv = this_mv;
@@ -1186,8 +1160,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x,
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
if (up < bestmse) {
*bestmv = this_mv;
@@ -1198,8 +1171,8 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x,
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (down < bestmse) {
*bestmv = this_mv;
@@ -1238,8 +1211,8 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x,
break;
}
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
- xd->allow_high_precision_mv);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
if (diag < bestmse) {
*bestmv = this_mv;
@@ -1326,7 +1299,8 @@ int vp9_hex_search
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
// adjust ref_mv to make sure it is within MV range
- clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ clamp_mv(&ref_mv->as_mv,
+ x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
br = ref_mv->as_mv.row;
bc = ref_mv->as_mv.col;
@@ -1482,7 +1456,8 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ clamp_mv(&ref_mv->as_mv,
+ x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
ref_row = ref_mv->as_mv.row;
ref_col = ref_mv->as_mv.col;
*num00 = 0;
@@ -1580,11 +1555,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
if (bestsad == INT_MAX)
return INT_MAX;
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost,
+ mvcost, x->errorperbit);
}
int vp9_diamond_search_sadx4(MACROBLOCK *x,
@@ -1624,7 +1597,8 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ clamp_mv(&ref_mv->as_mv,
+ x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
ref_row = ref_mv->as_mv.row;
ref_col = ref_mv->as_mv.col;
*num00 = 0;
@@ -1754,11 +1728,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x,
if (bestsad == INT_MAX)
return INT_MAX;
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) + mv_err_cost(&this_mv,
+ center_mv, mvjcost, mvcost, x->errorperbit);
}
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
@@ -1914,8 +1886,7 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
return
fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2042,8 +2013,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
return
fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2197,8 +2167,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
return
fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2274,8 +2243,7 @@ int vp9_refining_search_sad_c(MACROBLOCK *x,
return
fn_ptr->vf(what, what_stride, best_address, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2381,8 +2349,7 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
return
fn_ptr->vf(what, what_stride, best_address, in_what_stride,
(unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2472,12 +2439,10 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
if (bestsad < INT_MAX) {
// FIXME(rbultje, yunqing): add full-pixel averaging variance functions
// so we don't have to use the subpixel with xoff=0,yoff=0 here.
- int besterr = fn_ptr->svaf(best_address, in_what_stride, 0, 0,
+ return fn_ptr->svaf(best_address, in_what_stride, 0, 0,
what, what_stride, (unsigned int *)(&thissad),
second_pred) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
- xd->allow_high_precision_mv);
- return besterr;
+ mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
} else {
return INT_MAX;
}
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index c13ea7597..097d33c65 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -25,7 +25,7 @@
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
- int *mvcost[2], int weight, int ishp);
+ int *mvcost[2], int weight);
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
diff --git a/libvpx/vp9/encoder/vp9_onyx_if.c b/libvpx/vp9/encoder/vp9_onyx_if.c
index e5f1a5c2c..db039959f 100644
--- a/libvpx/vp9/encoder/vp9_onyx_if.c
+++ b/libvpx/vp9/encoder/vp9_onyx_if.c
@@ -243,16 +243,17 @@ void vp9_initialize_enc() {
static void setup_features(VP9_COMP *cpi) {
MACROBLOCKD *xd = &cpi->mb.e_mbd;
- struct loopfilter *lf = &xd->lf;
+ struct loopfilter *const lf = &xd->lf;
+ struct segmentation *const seg = &xd->seg;
// Set up default state for MB feature flags
- xd->seg.enabled = 0;
+ seg->enabled = 0;
- xd->seg.update_map = 0;
- xd->seg.update_data = 0;
- vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs));
+ seg->update_map = 0;
+ seg->update_data = 0;
+ vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
- vp9_clearall_segfeatures(&xd->seg);
+ vp9_clearall_segfeatures(seg);
lf->mode_ref_delta_enabled = 0;
lf->mode_ref_delta_update = 0;
@@ -324,6 +325,7 @@ static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) {
static void configure_static_seg_features(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ struct segmentation *seg = &xd->seg;
int high_q = (int)(cpi->avg_q > 48.0);
int qi_delta;
@@ -332,26 +334,26 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
if (cm->frame_type == KEY_FRAME) {
// Clear down the global segmentation map
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- xd->seg.update_map = 0;
- xd->seg.update_data = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
cpi->static_mb_pct = 0;
// Disable segmentation
vp9_disable_segmentation((VP9_PTR)cpi);
// Clear down the segment features.
- vp9_clearall_segfeatures(&xd->seg);
+ vp9_clearall_segfeatures(seg);
} else if (cpi->refresh_alt_ref_frame) {
// If this is an alt ref frame
// Clear down the global segmentation map
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- xd->seg.update_map = 0;
- xd->seg.update_data = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
cpi->static_mb_pct = 0;
// Disable segmentation and individual segment features by default
vp9_disable_segmentation((VP9_PTR)cpi);
- vp9_clearall_segfeatures(&xd->seg);
+ vp9_clearall_segfeatures(seg);
// Scan frames from current to arf frame.
// This function re-enables segmentation if appropriate.
@@ -359,45 +361,45 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
// If segmentation was enabled set those features needed for the
// arf itself.
- if (xd->seg.enabled) {
- xd->seg.update_map = 1;
- xd->seg.update_data = 1;
+ if (seg->enabled) {
+ seg->update_map = 1;
+ seg->update_data = 1;
qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875));
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2));
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2);
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2));
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
// Where relevant assume segment data is delta data
- xd->seg.abs_delta = SEGMENT_DELTADATA;
+ seg->abs_delta = SEGMENT_DELTADATA;
}
- } else if (xd->seg.enabled) {
+ } else if (seg->enabled) {
// All other frames if segmentation has been enabled
// First normal frame in a valid gf or alt ref group
if (cpi->frames_since_golden == 0) {
// Set up segment features for normal frames in an arf group
if (cpi->source_alt_ref_active) {
- xd->seg.update_map = 0;
- xd->seg.update_data = 1;
- xd->seg.abs_delta = SEGMENT_DELTADATA;
+ seg->update_map = 0;
+ seg->update_data = 1;
+ seg->abs_delta = SEGMENT_DELTADATA;
qi_delta = compute_qdelta(cpi, cpi->avg_q,
(cpi->avg_q * 1.125));
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2));
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2));
+ vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF);
+ vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
// Segment coding disabled for compred testing
if (high_q || (cpi->static_mb_pct == 100)) {
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP);
+ vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
}
} else {
// Disable segmentation and clear down features if alt ref
@@ -407,10 +409,10 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- xd->seg.update_map = 0;
- xd->seg.update_data = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
- vp9_clearall_segfeatures(&xd->seg);
+ vp9_clearall_segfeatures(seg);
}
} else if (cpi->is_src_frame_alt_ref) {
// Special case where we are coding over the top of a previous
@@ -418,28 +420,28 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
// Segment coding disabled for compred testing
// Enable ref frame features for segment 0 as well
- vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME);
+ vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
// All mbs should use ALTREF_FRAME
- vp9_clear_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME);
- vp9_set_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
- vp9_clear_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME);
- vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+ vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
+ vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+ vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
+ vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
// Skip all MBs if high Q (0,0 mv and skip coeffs)
if (high_q) {
- vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_SKIP);
- vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP);
+ vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
+ vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
}
// Enable data update
- xd->seg.update_data = 1;
+ seg->update_data = 1;
} else {
// All other frames.
// No updates.. leave things as they are.
- xd->seg.update_map = 0;
- xd->seg.update_data = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
}
}
}
@@ -718,7 +720,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->reduce_first_step_size = 0;
sf->auto_mv_step_size = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZE_AB4X4;
+ sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
sf->use_lastframe_partitioning = 0;
sf->tx_size_search_method = USE_FULL_RD;
@@ -731,10 +733,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_one_partition_size_always = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
- sf->use_partitions_less_than = 0;
- sf->less_than_block_size = BLOCK_SIZE_MB16X16;
- sf->use_partitions_greater_than = 0;
- sf->greater_than_block_size = BLOCK_SIZE_SB8X8;
+ sf->auto_min_max_partition_size = 0;
+ sf->auto_min_max_partition_interval = 0;
+ sf->auto_min_max_partition_count = 0;
+ // sf->use_max_partition_size = 0;
+ sf->max_partition_size = BLOCK_64X64;
+ // sf->use_min_partition_size = 0;
+ sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
sf->disable_splitmv = 0;
@@ -745,8 +750,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_uv_intra_rd_estimate = 0;
sf->using_small_partition_info = 0;
// Skip any mode not chosen at size < X for all sizes > X
- // Hence BLOCK_SIZE_SB64X64 (skip is off)
- sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64;
+ // Hence BLOCK_64X64 (skip is off)
+ sf->unused_mode_skip_lvl = BLOCK_64X64;
#if CONFIG_MULTIPLE_ARF
// Switch segmentation off.
@@ -769,8 +774,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
#endif
sf->use_avoid_tested_higherror = 1;
sf->adaptive_rd_thresh = 1;
- sf->last_chroma_intra_mode = TM_PRED;
-
if (speed == 1) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
sf->less_rectangular_check = 1;
@@ -784,14 +787,20 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->common.show_frame == 0);
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
- sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32;
+ sf->unused_mode_skip_lvl = BLOCK_32X32;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA;
- sf->last_chroma_intra_mode = H_PRED;
+ FLAG_SKIP_COMP_BESTINTRA |
+ FLAG_SKIP_INTRA_LOWVAR;
+ sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->auto_mv_step_size = 1;
+
+ sf->auto_min_max_partition_size = 1;
+ // sf->use_max_partition_size = 1;
+ // sf->use_min_partition_size = 1;
+ sf->auto_min_max_partition_interval = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -801,7 +810,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_lastframe_partitioning = 1;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
- sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32;
+ sf->unused_mode_skip_lvl = BLOCK_32X32;
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
cpi->common.intra_only ||
cpi->common.show_frame == 0) ?
@@ -810,11 +819,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_COMP_REFMISMATCH;
+ FLAG_SKIP_COMP_REFMISMATCH |
+ FLAG_SKIP_INTRA_LOWVAR |
+ FLAG_EARLY_TERMINATE;
sf->last_chroma_intra_mode = DC_PRED;
+ sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
- sf->use_uv_intra_rd_estimate = 1;
sf->using_small_partition_info = 1;
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
@@ -831,7 +842,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_COMP_REFMISMATCH;
+ FLAG_SKIP_COMP_REFMISMATCH |
+ FLAG_SKIP_INTRA_LOWVAR |
+ FLAG_EARLY_TERMINATE;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->disable_splitmv = 1;
@@ -840,7 +853,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
sf->use_one_partition_size_always = 1;
- sf->always_this_block_size = BLOCK_SIZE_MB16X16;
+ sf->always_this_block_size = BLOCK_16X16;
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
cpi->common.intra_only ||
cpi->common.show_frame == 0) ?
@@ -849,7 +862,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
- FLAG_SKIP_COMP_REFMISMATCH;
+ FLAG_SKIP_COMP_REFMISMATCH |
+ FLAG_SKIP_INTRA_LOWVAR |
+ FLAG_EARLY_TERMINATE;
sf->use_rd_breakout = 1;
sf->optimize_coefficients = 0;
sf->auto_mv_step_size = 1;
@@ -861,15 +876,15 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
/*
if (speed == 2) {
sf->first_step = 0;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8;
- sf->use_partitions_less_than = 1;
- sf->less_than_block_size = BLOCK_SIZE_MB16X16;
+ sf->comp_inter_joint_search_thresh = BLOCK_8X8;
+ sf->use_max_partition_size = 1;
+ sf->max_partition_size = BLOCK_16X16;
}
if (speed == 3) {
sf->first_step = 0;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8;
- sf->use_partitions_greater_than = 1;
- sf->greater_than_block_size = BLOCK_SIZE_SB8X8;
+ sf->comp_inter_joint_search_thresh = BLOCK_B8X8;
+ sf->use_min_partition_size = 1;
+ sf->min_partition_size = BLOCK_8X8;
}
*/
@@ -1383,7 +1398,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cm = &cpi->common;
- vpx_memset(cpi, 0, sizeof(VP9_COMP));
+ vp9_zero(*cpi);
if (setjmp(cm->error.jmp)) {
VP9_PTR ptr = ctx.ptr;
@@ -1833,7 +1848,10 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
{
printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
printf("\n_frames recive_data encod_mb_row compress_frame Total\n");
- printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_mb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000);
+ printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
+ cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
+ cpi->time_compress_data / 1000,
+ (cpi->time_receive_data + cpi->time_compress_data) / 1000);
}
#endif
@@ -2406,8 +2424,9 @@ static void update_reference_frames(VP9_COMP * const cpi) {
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ struct loopfilter *lf = &xd->lf;
if (xd->lossless) {
- xd->lf.filter_level = 0;
+ lf->filter_level = 0;
} else {
struct vpx_usec_timer timer;
@@ -2421,9 +2440,9 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
}
- if (xd->lf.filter_level > 0) {
- vp9_set_alt_lf_level(cpi, xd->lf.filter_level);
- vp9_loop_filter_frame(cm, xd, xd->lf.filter_level, 0);
+ if (lf->filter_level > 0) {
+ vp9_set_alt_lf_level(cpi, lf->filter_level);
+ vp9_loop_filter_frame(cm, xd, lf->filter_level, 0);
}
vp9_extend_frame_inner_borders(cm->frame_to_show,
@@ -2513,6 +2532,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
SPEED_FEATURES *sf = &cpi->sf;
unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
+ struct segmentation *seg = &xd->seg;
#if RESET_FOREACH_FILTER
int q_low0;
int q_high0;
@@ -2612,9 +2632,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
setup_features(cpi);
// If segmentation is enabled force a map update for key frames
- if (xd->seg.enabled) {
- xd->seg.update_map = 1;
- xd->seg.update_data = 1;
+ if (seg->enabled) {
+ seg->update_map = 1;
+ seg->update_data = 1;
}
// The alternate reference frame cannot be active for a key frame
@@ -2818,7 +2838,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
#endif
loop_count = 0;
- vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
+ vp9_zero(cpi->rd_tx_select_threshes);
if (cm->frame_type != KEY_FRAME) {
/* TODO: Decide this more intelligently */
@@ -3173,7 +3193,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (!cpi->common.error_resilient_mode &&
!cpi->common.frame_parallel_decoding_mode) {
vp9_adapt_mode_probs(&cpi->common);
- vp9_adapt_mode_context(&cpi->common);
vp9_adapt_mv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
}
}
@@ -3994,7 +4013,7 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
unsigned int threshold[MAX_SEGMENTS]) {
VP9_COMP *cpi = (VP9_COMP *) comp;
signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ struct segmentation *seg = &cpi->mb.e_mbd.seg;
int i;
if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
@@ -4021,14 +4040,14 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
// Enable the loop and quant changes in the feature mask
for (i = 0; i < MAX_SEGMENTS; i++) {
if (delta_q[i])
- vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q);
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
else
- vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q);
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
if (delta_lf[i])
- vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF);
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
else
- vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF);
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
}
// Initialise the feature data structure
diff --git a/libvpx/vp9/encoder/vp9_onyx_int.h b/libvpx/vp9/encoder/vp9_onyx_int.h
index 0798927bd..c258829c2 100644
--- a/libvpx/vp9/encoder/vp9_onyx_int.h
+++ b/libvpx/vp9/encoder/vp9_onyx_int.h
@@ -77,7 +77,7 @@ typedef struct {
// 0 = ZERO_MV, MV
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
- vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+ vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1];
vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
@@ -145,6 +145,8 @@ typedef struct {
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
THR_NEARESTMV,
+ THR_DC,
+
THR_NEARESTA,
THR_NEARESTG,
THR_NEWMV,
@@ -152,8 +154,6 @@ typedef enum {
THR_NEARMV,
THR_COMP_NEARESTGA,
- THR_DC,
-
THR_NEWG,
THR_NEWA,
THR_NEARA,
@@ -224,6 +224,10 @@ typedef enum {
// skips oblique intra modes at angles 27, 63, 117, 153 if the best
// intra so far is not one of the neighboring directions
FLAG_SKIP_INTRA_DIRMISMATCH = 16,
+
+ // skips intra modes other than DC_PRED if the source variance
+ // is small
+ FLAG_SKIP_INTRA_LOWVAR = 32,
} MODE_SEARCH_SKIP_LOGIC;
typedef struct {
@@ -258,10 +262,13 @@ typedef struct {
int unused_mode_skip_lvl;
int reference_masking;
BLOCK_SIZE_TYPE always_this_block_size;
- int use_partitions_greater_than;
- BLOCK_SIZE_TYPE greater_than_block_size;
- int use_partitions_less_than;
- BLOCK_SIZE_TYPE less_than_block_size;
+ int auto_min_max_partition_size;
+ int auto_min_max_partition_interval;
+ int auto_min_max_partition_count;
+ BLOCK_SIZE_TYPE min_partition_size;
+ BLOCK_SIZE_TYPE max_partition_size;
+ // int use_min_partition_size; // not used in code
+ // int use_max_partition_size;
int adjust_partitioning_from_last_frame;
int last_partitioning_redo_frequency;
int disable_splitmv;
@@ -370,9 +377,9 @@ typedef struct VP9_COMP {
unsigned int single_ref_count[REF_CONTEXTS][2][2];
unsigned int comp_ref_count[REF_CONTEXTS][2];
- int64_t rd_tx_select_diff[NB_TXFM_MODES];
+ int64_t rd_tx_select_diff[TX_MODES];
// FIXME(rbultje) can this overflow?
- int rd_tx_select_threshes[4][NB_TXFM_MODES];
+ int rd_tx_select_threshes[4][TX_MODES];
int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1];
@@ -457,9 +464,9 @@ typedef struct VP9_COMP {
nmv_context_counts NMVcount;
- vp9_coeff_count coef_counts[TX_SIZE_MAX_SB][BLOCK_TYPES];
- vp9_coeff_probs_model frame_coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES];
- vp9_coeff_stats frame_branch_ct[TX_SIZE_MAX_SB][BLOCK_TYPES];
+ vp9_coeff_count coef_counts[TX_SIZES][BLOCK_TYPES];
+ vp9_coeff_probs_model frame_coef_probs[TX_SIZES][BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct[TX_SIZES][BLOCK_TYPES];
int gfu_boost;
int last_boost;
@@ -527,7 +534,7 @@ typedef struct VP9_COMP {
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
- uint64_t time_encode_mb_row;
+ uint64_t time_encode_sb_row;
struct twopass_rc {
unsigned int section_intra_rating;
@@ -619,7 +626,7 @@ typedef struct VP9_COMP {
unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS];
- unsigned int txfm_stepdown_count[TX_SIZE_MAX_SB];
+ unsigned int txfm_stepdown_count[TX_SIZES];
int initial_width;
int initial_height;
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index 843cf3f03..2d932500e 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -54,6 +54,8 @@ DECLARE_ALIGNED(16, extern const uint8_t,
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{NEARESTMV, LAST_FRAME, NONE},
+ {DC_PRED, INTRA_FRAME, NONE},
+
{NEARESTMV, ALTREF_FRAME, NONE},
{NEARESTMV, GOLDEN_FRAME, NONE},
{NEWMV, LAST_FRAME, NONE},
@@ -61,8 +63,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{NEARMV, LAST_FRAME, NONE},
{NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
- {DC_PRED, INTRA_FRAME, NONE},
-
{NEWMV, GOLDEN_FRAME, NONE},
{NEWMV, ALTREF_FRAME, NONE},
{NEARMV, ALTREF_FRAME, NONE},
@@ -109,7 +109,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1
-static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
+static void fill_token_costs(vp9_coeff_cost *c,
vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
int i, j, k, l;
TX_SIZE t;
@@ -120,12 +120,12 @@ static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
vp9_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
- vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
+ vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp9_coef_tree);
- vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
+ vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp9_coef_tree);
- assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
- c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
+ assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
+ c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
}
}
@@ -453,7 +453,7 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
int *out_rate_sum, int64_t *out_dist_sum,
int *out_skip) {
int t = 4, j, k;
- BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
+ BLOCK_SIZE_TYPE bs = BLOCK_4X4;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int width = plane_block_width(bsize, pd);
@@ -513,14 +513,19 @@ int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
return error;
}
-static const int16_t band_counts[TX_SIZE_MAX_SB][8] = {
- { 1, 2, 3, 4, 3, 16 - 13 },
- { 1, 2, 3, 4, 11, 64 - 21 },
- { 1, 2, 3, 4, 11, 256 - 21 },
- { 1, 2, 3, 4, 11, 1024 - 21 },
+/* The trailing '0' is a terminator which is used inside cost_coeffs() to
+ * decide whether to include cost of a trailing EOB node or not (i.e. we
+ * can skip this if the last coefficient in this transform block, e.g. the
+ * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
+ * were non-zero). */
+static const int16_t band_counts[TX_SIZES][8] = {
+ { 1, 2, 3, 4, 3, 16 - 13, 0 },
+ { 1, 2, 3, 4, 11, 64 - 21, 0 },
+ { 1, 2, 3, 4, 11, 256 - 21, 0 },
+ { 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
-static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
+static INLINE int cost_coeffs(MACROBLOCK *mb,
int plane, int block, PLANE_TYPE type,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
TX_SIZE tx_size,
@@ -528,11 +533,11 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
MACROBLOCKD *const xd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt, c, cost;
- const int16_t *band_count = band_counts[tx_size];
+ const int16_t *band_count = &band_counts[tx_size][1];
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
- unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
+ unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
uint8_t token_cache[1024];
@@ -552,13 +557,14 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
c = 0;
} else {
- int v, prev_t, band = 1, band_left = band_count[1];
+ int v, prev_t, band_left = *band_count++;
// dc token
v = qcoeff_ptr[0];
prev_t = vp9_dct_value_tokens_ptr[v].token;
- cost = token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
+ cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
token_cache[0] = vp9_pt_energy_class[prev_t];
+ ++token_costs;
// ac tokens
for (c = 1; c < eob; c++) {
@@ -568,18 +574,19 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, token_cache, c);
- cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
+ cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
if (!--band_left) {
- band_left = band_count[++band];
+ band_left = *band_count++;
+ ++token_costs;
}
}
// eob token
- if (band < 6) {
+ if (band_left) {
pt = get_coef_context(nb, token_cache, c);
- cost += token_costs[0][band][pt][DCT_EOB_TOKEN];
+ cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
}
}
@@ -639,7 +646,7 @@ static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
&y_idx);
- args->rate += cost_coeffs(args->cm, args->x, plane, block,
+ args->rate += cost_coeffs(args->x, plane, block,
xd->plane[plane].plane_type, args->t_above + x_idx,
args->t_left + y_idx, args->tx_size,
args->scan, args->nb);
@@ -831,7 +838,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
int64_t ref_best_rd,
BLOCK_SIZE_TYPE bs) {
const TX_SIZE max_txfm_size = TX_32X32
- - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
+ - (bs < BLOCK_32X32) - (bs < BLOCK_16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@@ -859,25 +866,25 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
int *s, int *skip,
- int64_t txfm_cache[NB_TXFM_MODES],
+ int64_t tx_cache[TX_MODES],
BLOCK_SIZE_TYPE bs) {
- const TX_SIZE max_txfm_size = TX_32X32
- - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
+ const TX_SIZE max_tx_size = TX_32X32
+ - (bs < BLOCK_32X32) - (bs < BLOCK_16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
- int64_t rd[TX_SIZE_MAX_SB][2];
+ int64_t rd[TX_SIZES][2];
int n, m;
int s0, s1;
const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
r[n][1] = r[n][0];
if (r[n][0] == INT_MAX)
continue;
- for (m = 0; m <= n - (n == max_txfm_size); m++) {
+ for (m = 0; m <= n - (n == max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
@@ -889,7 +896,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
if (d[n] == INT64_MAX) {
rd[n][0] = rd[n][1] = INT64_MAX;
continue;
@@ -902,13 +909,13 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
(cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_32X32;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
(cm->tx_mode == ALLOW_16X16 ||
cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
@@ -928,34 +935,34 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
*rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
*skip = s[mbmi->txfm_size];
- txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
- txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
- txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
- txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
- if (max_txfm_size == TX_32X32 &&
+ tx_cache[ONLY_4X4] = rd[TX_4X4][0];
+ tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
+ tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
+ tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
+ if (max_tx_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1])
- txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
- else if (max_txfm_size >= TX_16X16 &&
+ tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
+ else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
- txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
+ tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
else
- txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
+ tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
rd[TX_4X4][1] : rd[TX_8X8][1];
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] &&
rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]) {
cpi->txfm_stepdown_count[0]++;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] &&
rd[TX_16X16][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
+ cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
+ cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
} else {
- cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
+ cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -967,16 +974,16 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_TYPE bs,
int *model_used) {
const TX_SIZE max_txfm_size = TX_32X32
- - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
+ - (bs < BLOCK_32X32) - (bs < BLOCK_16X16);
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
- int64_t rd[TX_SIZE_MAX_SB][2];
+ int64_t rd[TX_SIZES][2];
int n, m;
int s0, s1;
- double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
- // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
+ double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
+ // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
@@ -1065,11 +1072,11 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int64_t *distortion,
int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
- int64_t txfm_cache[NB_TXFM_MODES],
+ int64_t txfm_cache[TX_MODES],
int64_t ref_best_rd) {
VP9_COMMON *const cm = &cpi->common;
- int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
- int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB];
+ int r[TX_SIZES][2], s[TX_SIZES];
+ int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@@ -1080,7 +1087,7 @@ static void super_block_yrd(VP9_COMP *cpi,
if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
(cpi->sf.tx_size_search_method != USE_FULL_RD &&
mbmi->ref_frame[0] == INTRA_FRAME)) {
- vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
+ vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
if (psse)
@@ -1090,49 +1097,47 @@ static void super_block_yrd(VP9_COMP *cpi,
if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
mbmi->ref_frame[0] > INTRA_FRAME) {
- int model_used[TX_SIZE_MAX_SB] = {1, 1, 1, 1};
- if (bs >= BLOCK_SIZE_SB32X32) {
- if (model_used[TX_32X32]) {
+ int model_used[TX_SIZES] = {1, 1, 1, 1};
+ if (bs >= BLOCK_32X32) {
+ if (model_used[TX_32X32])
model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
&r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
- } else {
+ else
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32],
&s[TX_32X32], &sse[TX_32X32], INT64_MAX,
bs, TX_32X32);
- }
}
- if (bs >= BLOCK_SIZE_MB16X16) {
- if (model_used[TX_16X16]) {
+ if (bs >= BLOCK_16X16) {
+ if (model_used[TX_16X16])
model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
&r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
- } else {
+ else
super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16],
&s[TX_16X16], &sse[TX_16X16], INT64_MAX,
bs, TX_16X16);
- }
}
- if (model_used[TX_8X8]) {
+ if (model_used[TX_8X8])
model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
&r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
- } else {
+ else
super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
&sse[TX_8X8], INT64_MAX, bs, TX_8X8);
- }
- if (model_used[TX_4X4]) {
+
+ if (model_used[TX_4X4])
model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
&r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
- } else {
+ else
super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
&sse[TX_4X4], INT64_MAX, bs, TX_4X4);
- }
+
choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
skip, sse, ref_best_rd, bs, model_used);
} else {
- if (bs >= BLOCK_SIZE_SB32X32)
+ if (bs >= BLOCK_32X32)
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32],
&s[TX_32X32], &sse[TX_32X32], ref_best_rd,
bs, TX_32X32);
- if (bs >= BLOCK_SIZE_MB16X16)
+ if (bs >= BLOCK_16X16)
super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16],
&s[TX_16X16], &sse[TX_16X16], ref_best_rd,
bs, TX_16X16);
@@ -1174,28 +1179,30 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int64_t *bestdistortion,
- BLOCK_SIZE_TYPE bsize) {
+ BLOCK_SIZE_TYPE bsize,
+ int64_t rd_thresh) {
MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = rd_thresh;
int rate = 0;
int64_t distortion;
- VP9_COMMON *const cm = &cpi->common;
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- uint8_t *src, *dst;
+ uint8_t *src_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
+ p->src.buf, src_stride);
+ uint8_t *dst_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
+ pd->dst.buf, dst_stride);
int16_t *src_diff, *coeff;
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
TX_TYPE tx_type = DCT_DCT;
- TX_TYPE best_tx_type = DCT_DCT;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy, block;
- DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
+ uint8_t best_dst[8 * 8];
assert(ib < 4);
@@ -1223,17 +1230,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
int64_t ssz;
const int16_t *scan;
+ uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
+ uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
block = ib + idy * 2 + idx;
xd->mode_info_context->bmi[block].as_mode = mode;
- src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- p->src.buf, src_stride);
src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
p->src_diff);
coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
- dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- pd->dst.buf, dst_stride);
- vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
+ vp9_predict_intra_block(xd, block, 1,
TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
@@ -1252,12 +1257,14 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
}
scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
- ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
+ ratey += cost_coeffs(x, 0, block, PLANE_TYPE_Y_WITH_DC,
tempa + idx, templ + idy, TX_4X4, scan,
vp9_get_coef_neighbors_handle(scan));
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff,
block, 16),
16, &ssz) >> 2;
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next;
if (tx_type != DCT_DCT)
vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
@@ -1277,61 +1284,40 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- best_tx_type = tx_type;
vpx_memcpy(a, tempa, sizeof(tempa));
vpx_memcpy(l, templ, sizeof(templ));
- for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
- for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- block = ib + idy * 2 + idx;
- vpx_memcpy(best_dqcoeff[idy * 2 + idx],
- BLOCK_OFFSET(pd->dqcoeff, block, 16),
- sizeof(best_dqcoeff[0]));
- }
- }
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
+ num_4x4_blocks_wide * 4);
}
+ next:
+ {}
}
- if (x->skip_encode)
+ if (best_rd >= rd_thresh || x->skip_encode)
return best_rd;
- for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
- for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- block = ib + idy * 2 + idx;
- xd->mode_info_context->bmi[block].as_mode = *best_mode;
- src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- p->src.buf, src_stride);
- dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- pd->dst.buf, dst_stride);
-
- vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
- *best_mode,
- x->skip_encode ? src : dst,
- x->skip_encode ? src_stride : dst_stride,
- dst, dst_stride);
- // inverse transform
- if (best_tx_type != DCT_DCT)
- vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
- dst_stride, best_tx_type);
- else
- xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
- dst_stride);
- }
- }
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
+ num_4x4_blocks_wide * 4);
return best_rd;
}
-static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
- int *Rate, int *rate_y,
- int64_t *Distortion, int64_t best_rd) {
+static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
+ MACROBLOCK * const mb,
+ int * const rate,
+ int * const rate_y,
+ int64_t * const distortion,
+ int64_t best_rd) {
int i, j;
MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
int cost = 0;
- int64_t distortion = 0;
+ int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
@@ -1343,12 +1329,13 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
bmode_costs = mb->mbmode_cost;
+ // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int mis = xd->mode_info_stride;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
- int64_t UNINITIALIZED_IS_SAFE(d);
+ int64_t UNINITIALIZED_IS_SAFE(d), this_rd;
i = idy * 2 + idx;
if (cpi->common.frame_type == KEY_FRAME) {
@@ -1359,11 +1346,16 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
bmode_costs = mb->y_mode_costs[A][L];
}
- total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
- t_above + idx, t_left + idy,
- &r, &ry, &d, bsize);
+ this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
+ t_above + idx, t_left + idy,
+ &r, &ry, &d, bsize,
+ best_rd - total_rd);
+ if (this_rd >= best_rd - total_rd)
+ return INT64_MAX;
+
+ total_rd += this_rd;
cost += r;
- distortion += d;
+ total_distortion += d;
tot_rate_y += ry;
mic->bmi[i].as_mode = best_mode;
@@ -1377,19 +1369,19 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
}
}
- *Rate = cost;
+ *rate = cost;
*rate_y = tot_rate_y;
- *Distortion = distortion;
+ *distortion = total_distortion;
xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode;
- return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
+ return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE_TYPE bsize,
- int64_t txfm_cache[NB_TXFM_MODES],
+ int64_t tx_cache[TX_MODES],
int64_t best_rd) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@@ -1400,14 +1392,13 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int i;
int *bmode_costs = x->mbmode_cost;
- if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
- for (i = 0; i < NB_TXFM_MODES; i++)
- txfm_cache[i] = INT64_MAX;
- }
+ if (cpi->sf.tx_size_search_method == USE_FULL_RD)
+ for (i = 0; i < TX_MODES; i++)
+ tx_cache[i] = INT64_MAX;
- /* Y Search for 32x32 intra prediction mode */
+ /* Y Search for intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
- int64_t local_txfm_cache[NB_TXFM_MODES];
+ int64_t local_tx_cache[TX_MODES];
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
@@ -1421,7 +1412,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->e_mbd.mode_info_context->mbmi.mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
- bsize, local_txfm_cache, best_rd);
+ bsize, local_tx_cache, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1440,11 +1431,11 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- int64_t adj_rd = this_rd + local_txfm_cache[i] -
- local_txfm_cache[cpi->common.tx_mode];
- if (adj_rd < txfm_cache[i]) {
- txfm_cache[i] = adj_rd;
+ for (i = 0; i < TX_MODES; i++) {
+ const int64_t adj_rd = this_rd + local_tx_cache[i] -
+ local_tx_cache[cpi->common.tx_mode];
+ if (adj_rd < tx_cache[i]) {
+ tx_cache[i] = adj_rd;
}
}
}
@@ -1537,8 +1528,6 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-
return this_rd;
}
@@ -1609,8 +1598,8 @@ static int labels2mode(MACROBLOCK *x, int i,
MB_MODE_INFO * mbmi = &mic->mbmi;
int cost = 0, thismvcost = 0;
int idx, idy;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
@@ -1623,12 +1612,11 @@ static int labels2mode(MACROBLOCK *x, int i,
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
- 102, xd->allow_high_precision_mv);
+ 102);
if (mbmi->ref_frame[1] > 0) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
- mvjcost, mvcost, 102,
- xd->allow_high_precision_mv);
+ mvjcost, mvcost, 102);
}
break;
case NEARESTMV:
@@ -1678,11 +1666,12 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int k;
- VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
- const int width = plane_block_width(bsize, &xd->plane[0]);
- const int height = plane_block_height(bsize, &xd->plane[0]);
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ MODE_INFO *const mi = xd->mode_info_context;
+ const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
+ const int width = plane_block_width(bsize, pd);
+ const int height = plane_block_height(bsize, pd);
int idx, idy;
const int src_stride = x->plane[0].src.stride;
uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@@ -1692,39 +1681,33 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
x->plane[0].src_diff);
int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
+ pd->pre[0].buf,
+ pd->pre[0].stride);
uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
+ pd->dst.buf,
+ pd->dst.stride);
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0;
- vp9_build_inter_predictor(pre,
- xd->plane[0].pre[0].stride,
- dst,
- xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[i].as_mv[0],
+ vp9_build_inter_predictor(pre, pd->pre[0].stride,
+ dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[0].as_mv,
&xd->scale_factor[0],
- width, height, 0, &xd->subpix,
- MV_PRECISION_Q3);
+ width, height, 0, &xd->subpix, MV_PRECISION_Q3);
- if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
+ if (mi->mbmi.ref_frame[1] > 0) {
uint8_t* const second_pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].pre[1].buf,
- xd->plane[0].pre[1].stride);
- vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
- dst, xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[i].as_mv[1],
+ pd->pre[1].buf, pd->pre[1].stride);
+ vp9_build_inter_predictor(second_pre, pd->pre[1].stride,
+ dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[1].as_mv,
&xd->scale_factor[1],
- width, height, 1,
- &xd->subpix, MV_PRECISION_Q3);
+ width, height, 1, &xd->subpix, MV_PRECISION_Q3);
}
- vp9_subtract_block(height, width, src_diff, 8,
- src, src_stride,
- dst, xd->plane[0].dst.stride);
+ vp9_subtract_block(height, width, src_diff, 8, src, src_stride,
+ dst, pd->dst.stride);
k = i;
for (idy = 0; idy < height / 4; ++idy) {
@@ -1737,11 +1720,10 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
x->fwd_txm4x4(src_diff, coeff, 16);
x->quantize_b_4x4(x, k, DCT_DCT, 16);
- thisdistortion += vp9_block_error(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff,
- k, 16), 16, &ssz);
+ thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k, 16),
+ 16, &ssz);
thissse += ssz;
- thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
+ thisrate += cost_coeffs(x, 0, k, PLANE_TYPE_Y_WITH_DC,
ta + (k & 1),
tl + (k >> 1), TX_4X4,
vp9_default_scan_4x4,
@@ -1836,8 +1818,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int label_mv_thresh;
int segmentyrate = 0;
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
- int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
vp9_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT t_above[2], t_left[2];
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
@@ -1871,12 +1853,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
&frame_mv[NEARESTMV][mbmi->ref_frame[0]],
&frame_mv[NEARMV][mbmi->ref_frame[0]],
- i, 0);
+ i, 0, mi_row, mi_col);
if (mbmi->ref_frame[1] > 0)
vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
&frame_mv[NEARESTMV][mbmi->ref_frame[1]],
&frame_mv[NEARMV][mbmi->ref_frame[1]],
- i, 1);
+ i, 1, mi_row, mi_col);
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
@@ -1984,7 +1966,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// Should we do a full search (best quality only)
if (cpi->compressor_speed == 0) {
/* Check if mvp_full is within the range. */
- clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
@@ -2204,7 +2186,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi = &mi->mbmi;
int mode_idx;
- vpx_memset(bsi, 0, sizeof(*bsi));
+ vp9_zero(*bsi);
bsi->segment_rd = best_rd;
bsi->ref_mv = best_ref_mv;
@@ -2358,7 +2340,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int_mv *ref_mv,
int_mv *second_ref_mv,
int64_t comp_pred_diff[NB_PREDICTION_TYPES],
- int64_t txfm_size_diff[NB_TXFM_MODES],
+ int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2380,7 +2362,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
// FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
// doesn't actually work this way
- memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
+ memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
memcpy(ctx->best_filter_diff, best_filter_diff,
sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
}
@@ -2444,7 +2426,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
xd->prev_mode_info_context,
frame_type,
mbmi->ref_mvs[frame_type],
- cpi->common.ref_frame_sign_bias);
+ cpi->common.ref_frame_sign_bias, mi_row, mi_col);
// Candidate refinement carried out at encoder and decoder
vp9_find_best_ref_mvs(xd,
@@ -2469,7 +2451,7 @@ static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
return scaled_ref_frame;
}
-static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
+static INLINE int get_switchable_rate(MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
@@ -2575,7 +2557,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
*rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
x->nmvjointcost, x->mvcost,
- 96, xd->allow_high_precision_mv);
+ 96);
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
@@ -2663,7 +2645,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
vp9_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, pw,
- &frame_mv[refs[!id]],
+ &frame_mv[refs[!id]].as_mv,
&xd->scale_factor[!id],
pw, ph, 0,
&xd->subpix, MV_PRECISION_Q3);
@@ -2730,12 +2712,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
*rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
&mbmi->ref_mvs[refs[0]][0],
- x->nmvjointcost, x->mvcost, 96,
- x->e_mbd.allow_high_precision_mv);
+ x->nmvjointcost, x->mvcost, 96);
*rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
&mbmi->ref_mvs[refs[1]][0],
- x->nmvjointcost, x->mvcost, 96,
- x->e_mbd.allow_high_precision_mv);
+ x->nmvjointcost, x->mvcost, 96);
vpx_free(second_pred);
}
@@ -2775,46 +2755,36 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
- switch (this_mode) {
+ if (this_mode == NEWMV) {
int rate_mv;
- case NEWMV:
- if (is_comp_pred) {
- // Initialize mv using single prediction mode result.
- frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
- frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+ if (is_comp_pred) {
+ // Initialize mv using single prediction mode result.
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- joint_motion_search(cpi, x, bsize, frame_mv,
- mi_row, mi_col, single_newmv, &rate_mv);
- } else {
- rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
- &mbmi->ref_mvs[refs[0]][0],
- x->nmvjointcost, x->mvcost, 96,
- x->e_mbd.allow_high_precision_mv);
- rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
- &mbmi->ref_mvs[refs[1]][0],
- x->nmvjointcost, x->mvcost, 96,
- x->e_mbd.allow_high_precision_mv);
- }
- if (frame_mv[refs[0]].as_int == INVALID_MV ||
- frame_mv[refs[1]].as_int == INVALID_MV)
- return INT64_MAX;
- *rate2 += rate_mv;
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ joint_motion_search(cpi, x, bsize, frame_mv,
+ mi_row, mi_col, single_newmv, &rate_mv);
} else {
- int_mv tmp_mv;
- single_motion_search(cpi, x, bsize, mi_row, mi_col,
- &tmp_mv, &rate_mv);
- *rate2 += rate_mv;
- frame_mv[refs[0]].as_int =
- xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
- single_newmv[refs[0]].as_int = tmp_mv.as_int;
+ rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
+ &mbmi->ref_mvs[refs[0]][0],
+ x->nmvjointcost, x->mvcost, 96);
+ rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
+ &mbmi->ref_mvs[refs[1]][0],
+ x->nmvjointcost, x->mvcost, 96);
}
- break;
- case NEARMV:
- case NEARESTMV:
- case ZEROMV:
- default:
- break;
+ if (frame_mv[refs[0]].as_int == INVALID_MV ||
+ frame_mv[refs[1]].as_int == INVALID_MV)
+ return INT64_MAX;
+ *rate2 += rate_mv;
+ } else {
+ int_mv tmp_mv;
+ single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
+ *rate2 += rate_mv;
+ frame_mv[refs[0]].as_int =
+ xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
+ single_newmv[refs[0]].as_int = tmp_mv.as_int;
+ }
}
// if we're near/nearest and mv == 0,0, compare to zeromv
@@ -2856,10 +2826,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < num_refs; ++i) {
cur_mv[i] = frame_mv[refs[i]];
// Clip "next_nearest" so that it does not extend to far out of image
- if (this_mode == NEWMV)
- assert(!clamp_mv2(&cur_mv[i], xd));
- else
- clamp_mv2(&cur_mv[i], xd);
+ if (this_mode != NEWMV)
+ clamp_mv2(&cur_mv[i].as_mv, xd);
if (mv_check_bounds(x, &cur_mv[i]))
return INT64_MAX;
@@ -2918,7 +2886,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const int is_intpel_interp = intpel_mv;
mbmi->interp_filter = filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
- rs = get_switchable_rate(cm, x);
+ rs = get_switchable_rate(x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
if (interpolating_intpel_seen && is_intpel_interp) {
@@ -2995,11 +2963,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[i].dst.stride = orig_dst_stride[i];
}
}
- // Set the appripriate filter
+ // Set the appropriate filter
mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
cm->mcomp_filter_type : *best_filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
- rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
+ rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
if (pred_exists) {
if (best_needs_copy) {
@@ -3033,55 +3001,82 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cpi->common.mcomp_filter_type == SWITCHABLE)
- *rate2 += get_switchable_rate(cm, x);
-
- if (cpi->active_map_enabled && x->active_ptr[0] == 0)
- x->skip = 1;
- else if (x->encode_breakout) {
- const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]);
- const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
-
- unsigned int var, sse;
- int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4);
-
-
- if (threshold < x->encode_breakout)
- threshold = x->encode_breakout;
-
- var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride,
- &sse);
-
- if ((int)sse < threshold) {
- unsigned int q2dc = xd->plane[0].dequant[0];
- // If there is no codeable 2nd order dc
- // or a very small uniform pixel change change
- if ((sse - var < q2dc * q2dc >> 4) ||
- (sse / 2 > var && sse - var < 64)) {
- // Check u and v to make sure skip is ok
- int sse2;
- unsigned int sse2u, sse2v;
- var = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
- x->plane[1].src.stride,
- xd->plane[1].dst.buf,
- xd->plane[1].dst.stride, &sse2u);
- var = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
- x->plane[2].src.stride,
- xd->plane[2].dst.buf,
- xd->plane[2].dst.stride, &sse2v);
- sse2 = sse2u + sse2v;
-
- if (sse2 * 2 < threshold) {
- x->skip = 1;
- *distortion = sse + sse2;
- *rate2 = 500;
-
- // for best yrd calculation
- *rate_uv = 0;
- *distortion_uv = sse2;
-
- *disable_skip = 1;
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+ *rate2 += get_switchable_rate(x);
+
+ if (!is_comp_pred) {
+ if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+ x->skip = 1;
+ else if (x->encode_breakout) {
+ const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]);
+ const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize,
+ &xd->plane[1]);
+ unsigned int var, sse;
+ // Skipping threshold for ac.
+ unsigned int thresh_ac;
+ // The encode_breakout input
+ unsigned int encode_breakout = x->encode_breakout << 4;
+
+ // Calculate threshold according to dequant value.
+ thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
+
+ // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
+ if (thresh_ac > 36000)
+ thresh_ac = 36000;
+
+ // Use encode_breakout input if it is bigger than internal threshold.
+ if (thresh_ac < encode_breakout)
+ thresh_ac = encode_breakout;
+
+ var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride, &sse);
+
+ // Adjust threshold according to partition size.
+ thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ // Y skipping condition checking
+ if (sse < thresh_ac || sse == 0) {
+ // Skipping threshold for dc
+ unsigned int thresh_dc;
+
+ thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
+
+ // dc skipping checking
+ if ((sse - var) < thresh_dc || sse == var) {
+ unsigned int sse_u, sse_v;
+ unsigned int var_u, var_v;
+
+ var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
+ x->plane[1].src.stride,
+ xd->plane[1].dst.buf,
+ xd->plane[1].dst.stride, &sse_u);
+
+ // U skipping condition checking
+ if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
+ (sse_u - var_u < thresh_dc || sse_u == var_u)) {
+ var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
+ x->plane[2].src.stride,
+ xd->plane[2].dst.buf,
+ xd->plane[2].dst.stride, &sse_v);
+
+ // V skipping condition checking
+ if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
+ (sse_v - var_v < thresh_dc || sse_v == var_v)) {
+ x->skip = 1;
+
+ *rate2 = 500;
+ *rate_uv = 0;
+
+ // Scaling factor for SSE from spatial domain to frequency domain
+ // is 16. Adjust distortion accordingly.
+ *distortion_uv = (sse_u + sse_v) << 4;
+ *distortion = (sse << 4) + *distortion_uv;
+
+ *disable_skip = 1;
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+ }
+ }
}
}
}
@@ -3133,15 +3128,13 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
int y_skip = 0, uv_skip;
- int64_t dist_y = 0, dist_uv = 0, txfm_cache[NB_TXFM_MODES];
-
+ int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
x->skip_encode = 0;
- vpx_memset(&txfm_cache, 0, sizeof(txfm_cache));
ctx->skip = 0;
xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
if (bsize >= BLOCK_SIZE_SB8X8) {
if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, bsize, txfm_cache,
+ &dist_y, &y_skip, bsize, tx_cache,
best_rd) >= best_rd) {
*returnrate = INT_MAX;
return;
@@ -3150,8 +3143,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&dist_uv, &uv_skip, bsize);
} else {
y_skip = 0;
- if (rd_pick_intra4x4mby_modes(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, best_rd) >= best_rd) {
+ if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, best_rd) >= best_rd) {
*returnrate = INT_MAX;
return;
}
@@ -3163,17 +3156,15 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
*returndist = dist_y + (dist_uv >> 2);
- memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
+ vp9_zero(ctx->tx_rd_diff);
} else {
int i;
*returnrate = rate_y + rate_uv +
vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
*returndist = dist_y + (dist_uv >> 2);
- if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->tx_mode];
- }
- }
+ if (cpi->sf.tx_size_search_method == USE_FULL_RD)
+ for (i = 0; i < TX_MODES; i++)
+ ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
}
ctx->mic = *xd->mode_info_context;
@@ -3189,9 +3180,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const struct segmentation *seg = &xd->seg;
const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]);
MB_PREDICTION_MODE this_mode;
- MV_REFERENCE_FRAME ref_frame;
+ MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
@@ -3205,8 +3197,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->alt_fb_idx};
int64_t best_rd = best_rd_so_far;
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
- int64_t best_txfm_rd[NB_TXFM_MODES];
- int64_t best_txfm_diff[NB_TXFM_MODES];
+ int64_t best_tx_rd[TX_MODES];
+ int64_t best_tx_diff[TX_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
int64_t best_pred_rd[NB_PREDICTION_TYPES];
int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
@@ -3222,10 +3214,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// MB_PREDICTION_MODE best_inter_mode = ZEROMV;
MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
- int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
- int64_t dist_uv[TX_SIZE_MAX_SB];
- int skip_uv[TX_SIZE_MAX_SB];
- MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
+ int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
+ int64_t dist_uv[TX_SIZES];
+ int skip_uv[TX_SIZES];
+ MB_PREDICTION_MODE mode_uv[TX_SIZES];
struct scale_factors scale_factor[4];
unsigned int ref_frame_mask = 0;
unsigned int mode_mask = 0;
@@ -3254,7 +3246,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
ctx->frames_with_high_error = 0;
ctx->modes_with_high_error = 0;
- xd->mode_info_context->mbmi.segment_id = segment_id;
estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3262,16 +3253,17 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < NB_TXFM_MODES; i++)
- best_txfm_rd[i] = INT64_MAX;
+ for (i = 0; i < TX_MODES; i++)
+ best_tx_rd[i] = INT64_MAX;
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
best_filter_rd[i] = INT64_MAX;
- for (i = 0; i < TX_SIZE_MAX_SB; i++)
+ for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
*returnrate = INT_MAX;
- // Create a mask set to 1 for each frame used by a smaller resolution.
+ // Create a mask set to 1 for each reference frame used by a smaller
+ // resolution.
if (cpi->sf.use_avoid_tested_higherror) {
switch (block_size) {
case BLOCK_64X64:
@@ -3321,24 +3313,26 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable;
- int64_t txfm_cache[NB_TXFM_MODES];
+ int64_t tx_cache[TX_MODES];
int i;
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
int early_term = 0;
- for (i = 0; i < NB_TXFM_MODES; ++i)
- txfm_cache[i] = INT64_MAX;
+ for (i = 0; i < TX_MODES; ++i)
+ tx_cache[i] = INT64_MAX;
+ x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame;
+ second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
- // Slip modes that have been masked off but always consider first mode.
- if ( mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
+ // Skip modes that have been masked off but always consider first mode.
+ if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
(cpi->unused_mode_skip_mask & (1 << mode_index)) )
continue;
- // Skip if the current refernce frame has been masked off
+ // Skip if the current reference frame has been masked off
if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
(cpi->ref_frame_mask & (1 << ref_frame)))
continue;
@@ -3351,12 +3345,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Do not allow compound prediction if the segment level reference
// frame feature is in use as in this case there can only be one reference.
- if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) &&
- vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME))
+ if ((second_ref_frame > INTRA_FRAME) &&
+ vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
- x->skip = 0;
-
// Skip some checking based on small partitions' result.
if (x->fast_ms > 1 && !ref_frame)
continue;
@@ -3370,51 +3362,49 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!(mode_mask & (1 << this_mode))) {
continue;
}
- if (vp9_mode_order[mode_index].second_ref_frame != NONE
- && !(ref_frame_mask
- & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
+ if (second_ref_frame != NONE
+ && !(ref_frame_mask & (1 << second_ref_frame))) {
continue;
}
}
mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame;
+ mbmi->ref_frame[1] = second_ref_frame;
if (!(ref_frame == INTRA_FRAME
|| (cpi->ref_frame_flags & flag_list[ref_frame]))) {
continue;
}
- if (!(mbmi->ref_frame[1] == NONE
- || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) {
+ if (!(second_ref_frame == NONE
+ || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
continue;
}
- comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
+ comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
continue;
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
- if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame &&
- vp9_mode_order[mode_index].second_ref_frame != best_inter_ref_frame)
+ if (ref_frame != best_inter_ref_frame &&
+ second_ref_frame != best_inter_ref_frame)
continue;
}
// TODO(jingning, jkoleszar): scaling reference frame not supported for
// SPLITMV.
- if (mbmi->ref_frame[0] > 0 &&
- (scale_factor[mbmi->ref_frame[0]].x_scale_fp != VP9_REF_NO_SCALE ||
- scale_factor[mbmi->ref_frame[0]].y_scale_fp != VP9_REF_NO_SCALE) &&
+ if (ref_frame > 0 &&
+ (scale_factor[ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
+ scale_factor[ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
this_mode == SPLITMV)
continue;
- if (mbmi->ref_frame[1] > 0 &&
- (scale_factor[mbmi->ref_frame[1]].x_scale_fp != VP9_REF_NO_SCALE ||
- scale_factor[mbmi->ref_frame[1]].y_scale_fp != VP9_REF_NO_SCALE) &&
+ if (second_ref_frame > 0 &&
+ (scale_factor[second_ref_frame].x_scale_fp != VP9_REF_NO_SCALE ||
+ scale_factor[second_ref_frame].y_scale_fp != VP9_REF_NO_SCALE) &&
this_mode == SPLITMV)
continue;
- set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
- scale_factor);
+ set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
mbmi->mode = this_mode;
mbmi->uv_mode = DC_PRED;
@@ -3431,46 +3421,43 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (comp_pred) {
- if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))
+ if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
continue;
- set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
- scale_factor);
+ set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
mode_excluded = mode_excluded
? mode_excluded
: cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
} else {
- // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1];
- if (ref_frame != INTRA_FRAME) {
- if (mbmi->ref_frame[1] != INTRA_FRAME)
- mode_excluded =
- mode_excluded ?
- mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
+ if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
+ mode_excluded =
+ mode_excluded ?
+ mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
}
}
- // Select predictors
+ // Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
if (comp_pred)
- xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
+ xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) !=
+ if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
(int)ref_frame) {
continue;
// If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP) &&
+ } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
(this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(&xd->seg, segment_id,
+ } else if (!vp9_segfeature_active(seg, segment_id,
SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
@@ -3506,9 +3493,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
*/
+ // I4X4_PRED is only considered for block sizes less than 8x8.
mbmi->txfm_size = TX_4X4;
- rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
- &distortion_y, INT64_MAX);
+ if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
+ &distortion_y, best_rd) >= best_rd)
+ continue;
rate2 += rate;
rate2 += intra_cost_penalty;
distortion2 += distortion_y;
@@ -3524,11 +3513,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += dist_uv[TX_4X4];
distortion_uv = dist_uv[TX_4X4];
mbmi->uv_mode = mode_uv[TX_4X4];
- txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < NB_TXFM_MODES; ++i)
- txfm_cache[i] = txfm_cache[ONLY_4X4];
+ tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < TX_MODES; ++i)
+ tx_cache[i] = tx_cache[ONLY_4X4];
} else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
+ // Disable intra modes other than DC_PRED for blocks with low variance
+ // Threshold for intra skipping based on source variance
+ // TODO(debargha): Specialize the threshold for super block sizes
+ static const int skip_intra_var_thresh[BLOCK_SIZE_TYPES] = {
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ };
+ if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
+ this_mode != DC_PRED &&
+ x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
+ continue;
// Only search the oblique modes if the best so far is
// one of the neighboring directional modes
if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
@@ -3541,7 +3540,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
- bsize, txfm_cache, best_rd);
+ bsize, tx_cache, best_rd);
if (rate_y == INT_MAX)
continue;
@@ -3564,7 +3563,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
} else if (this_mode == SPLITMV) {
- const int is_comp_pred = mbmi->ref_frame[1] > 0;
+ const int is_comp_pred = second_ref_frame > 0;
int rate;
int64_t distortion;
int64_t this_rd_thresh;
@@ -3574,7 +3573,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_best_skippable = 0;
int switchable_filter_index;
int_mv *second_ref = is_comp_pred ?
- &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL;
+ &mbmi->ref_mvs[second_ref_frame][0] : NULL;
union b_mode_info tmp_best_bmodes[16];
MB_MODE_INFO tmp_best_mbmode;
PARTITION_INFO tmp_best_partition;
@@ -3586,16 +3585,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
continue;
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
- if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame &&
- vp9_mode_order[mode_index].second_ref_frame !=
- best_inter_ref_frame)
+ if (ref_frame != best_inter_ref_frame &&
+ second_ref_frame != best_inter_ref_frame)
continue;
}
- this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ?
+ this_rd_thresh = (ref_frame == LAST_FRAME) ?
cpi->rd_threshes[bsize][THR_NEWMV] :
cpi->rd_threshes[bsize][THR_NEWA];
- this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ?
+ this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
@@ -3610,7 +3608,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
- &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
+ &mbmi->ref_mvs[ref_frame][0],
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
@@ -3622,7 +3620,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (tmp_rd == INT64_MAX)
continue;
cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
- rs = get_switchable_rate(cm, x);
+ rs = get_switchable_rate(x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
@@ -3672,7 +3670,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Handles the special case when a filter that is not in the
// switchable list (bilinear, 6-tap) is indicated at the frame level
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
- &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
+ &mbmi->ref_mvs[ref_frame][0],
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
@@ -3684,7 +3682,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
} else {
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- int rs = get_switchable_rate(cm, x);
+ int rs = get_switchable_rate(x);
tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
}
tmp_rd = tmp_best_rdu;
@@ -3703,7 +3701,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion;
if (cpi->common.mcomp_filter_type == SWITCHABLE)
- rate2 += get_switchable_rate(cm, x);
+ rate2 += get_switchable_rate(x);
if (!mode_excluded) {
if (is_comp_pred)
@@ -3728,15 +3726,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
skippable = skippable && uv_skippable;
total_sse += uv_sse;
- txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < NB_TXFM_MODES; ++i)
- txfm_cache[i] = txfm_cache[ONLY_4X4];
+ tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < TX_MODES; ++i)
+ tx_cache[i] = tx_cache[ONLY_4X4];
}
} else {
- compmode_cost = vp9_cost_bit(comp_mode_p,
- mbmi->ref_frame[1] > INTRA_FRAME);
+ compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
this_rd = handle_inter_mode(cpi, x, bsize,
- txfm_cache,
+ tx_cache,
&rate2, &distortion2, &skippable,
&rate_y, &distortion_y,
&rate_uv, &distortion_uv,
@@ -3754,10 +3751,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
- if (mbmi->ref_frame[1] > INTRA_FRAME) {
- rate2 += ref_costs_comp[mbmi->ref_frame[0]];
+ if (second_ref_frame > INTRA_FRAME) {
+ rate2 += ref_costs_comp[ref_frame];
} else {
- rate2 += ref_costs_single[mbmi->ref_frame[0]];
+ rate2 += ref_costs_single[ref_frame];
}
if (!disable_skip) {
@@ -3766,7 +3763,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// necessary adjustment for rate. Ignore if skip is coded at
// segment level as the cost wont have been added in.
// Is Mb level skip allowed (i.e. not coded at segment level).
- const int mb_skip_allowed = !vp9_segfeature_active(&xd->seg, segment_id,
+ const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
SEG_LVL_SKIP);
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
@@ -3787,8 +3784,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 += prob_skip_cost;
}
}
- } else if (mb_skip_allowed && ref_frame != INTRA_FRAME &&
- !xd->lossless) {
+ } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
@@ -3835,7 +3831,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// best_inter_mode = xd->mode_info_context->mbmi.mode;
}
- if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) {
+ if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
@@ -3848,9 +3844,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|| distortion2 < mode_distortions[this_mode]) {
mode_distortions[this_mode] = distortion2;
}
- if (frame_distortions[mbmi->ref_frame[0]] == -1
- || distortion2 < frame_distortions[mbmi->ref_frame[0]]) {
- frame_distortions[mbmi->ref_frame[0]] = distortion2;
+ if (frame_distortions[ref_frame] == -1
+ || distortion2 < frame_distortions[ref_frame]) {
+ frame_distortions[ref_frame] = distortion2;
}
}
@@ -3858,8 +3854,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rd < best_rd || x->skip) {
if (!mode_excluded) {
// Note index of best mode so far
- const int qstep = xd->plane[0].dequant[1];
-
best_mode_index = mode_index;
if (ref_frame == INTRA_FRAME) {
@@ -3882,9 +3876,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
- if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE)
- if (ref_frame > INTRA_FRAME && distortion2 * 4 < qstep * qstep)
+ if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
+ const int qstep = xd->plane[0].dequant[1];
+ // TODO(debargha): Enhance this by specializing for each mode_index
+ int scale = 4;
+ if (x->source_variance < UINT_MAX) {
+ const int var_adjust = (x->source_variance < 16);
+ scale -= var_adjust;
+ }
+ if (ref_frame > INTRA_FRAME &&
+ distortion2 * scale < qstep * qstep) {
early_term = 1;
+ }
+ }
}
#if 0
// Testing this mode gave rise to an improvement in best error score.
@@ -3912,7 +3916,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
/* keep record of best compound/single-only prediction */
- if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) {
+ if (!disable_skip && ref_frame != INTRA_FRAME) {
int single_rd, hybrid_rd, single_rate, hybrid_rate;
if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
@@ -3926,10 +3930,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
- if (mbmi->ref_frame[1] <= INTRA_FRAME &&
+ if (second_ref_frame <= INTRA_FRAME &&
single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
- } else if (mbmi->ref_frame[1] > INTRA_FRAME &&
+ } else if (second_ref_frame > INTRA_FRAME &&
single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
}
@@ -3938,7 +3942,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
/* keep record of best filter type */
- if (!mode_excluded && !disable_skip && mbmi->ref_frame[0] != INTRA_FRAME &&
+ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
cm->mcomp_filter_type != BILINEAR) {
int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
VP9_SWITCHABLE_FILTERS :
@@ -3958,34 +3962,35 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
/* keep record of best txfm size */
- if (bsize < BLOCK_SIZE_SB32X32) {
- if (bsize < BLOCK_SIZE_MB16X16) {
+ if (bsize < BLOCK_32X32) {
+ if (bsize < BLOCK_16X16) {
if (this_mode == SPLITMV || this_mode == I4X4_PRED)
- txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
- txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
+ tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
+ tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
}
- txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
+ tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
}
if (!mode_excluded && this_rd != INT64_MAX) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
+ for (i = 0; i < TX_MODES; i++) {
int64_t adj_rd = INT64_MAX;
if (this_mode != I4X4_PRED) {
- adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->tx_mode];
+ adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
} else {
adj_rd = this_rd;
}
- if (adj_rd < best_txfm_rd[i])
- best_txfm_rd[i] = adj_rd;
+ if (adj_rd < best_tx_rd[i])
+ best_tx_rd[i] = adj_rd;
}
}
if (early_term)
break;
- if (x->skip && !mode_excluded)
+ if (x->skip && !comp_pred)
break;
}
+
if (best_rd >= best_rd_so_far)
return INT64_MAX;
@@ -4044,7 +4049,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
(best_mbmode.ref_frame[0] == INTRA_FRAME));
- // Updating rd_thresh_freq_fact[] here means that the differnt
+ // Updating rd_thresh_freq_fact[] here means that the different
// partition/block sizes are handled independently based on the best
// choice for the current partition. It may well be better to keep a scaled
// best rd so far value and update rd_thresh_freq_fact based on the mode/size
@@ -4126,14 +4131,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!x->skip) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = 0;
+ for (i = 0; i < TX_MODES; i++) {
+ if (best_tx_rd[i] == INT64_MAX)
+ best_tx_diff[i] = 0;
else
- best_txfm_diff[i] = best_rd - best_txfm_rd[i];
+ best_tx_diff[i] = best_rd - best_tx_rd[i];
}
} else {
- vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
+ vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff));
}
set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
@@ -4143,7 +4148,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
&mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
mbmi->ref_frame[1]][0],
- best_pred_diff, best_txfm_diff, best_filter_diff);
+ best_pred_diff, best_tx_diff, best_filter_diff);
return best_rd;
}
diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c
index ef84cc5c0..9564edc84 100644
--- a/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/libvpx/vp9/encoder/vp9_segmentation.c
@@ -57,8 +57,7 @@ void vp9_set_segment_data(VP9_PTR ptr,
}
// Based on set of segment counts calculate a probability tree
-static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts,
- vp9_prob *segment_tree_probs) {
+static void calc_segtree_probs(int *segcounts, vp9_prob *segment_tree_probs) {
// Work out probabilities of each segment
const int c01 = segcounts[0] + segcounts[1];
const int c23 = segcounts[2] + segcounts[3];
@@ -75,7 +74,7 @@ static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts,
}
// Based on set of segment counts and probabilities calculate a cost estimate
-static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) {
+static int cost_segmap(int *segcounts, vp9_prob *probs) {
const int c01 = segcounts[0] + segcounts[1];
const int c23 = segcounts[2] + segcounts[3];
const int c45 = segcounts[4] + segcounts[5];
@@ -189,13 +188,13 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi,
int n;
assert(bwl < bsl && bhl < bsl);
- if (bsize == BLOCK_SIZE_SB64X64) {
- subsize = BLOCK_SIZE_SB32X32;
- } else if (bsize == BLOCK_SIZE_SB32X32) {
- subsize = BLOCK_SIZE_MB16X16;
+ if (bsize == BLOCK_64X64) {
+ subsize = BLOCK_32X32;
+ } else if (bsize == BLOCK_32X32) {
+ subsize = BLOCK_16X16;
} else {
- assert(bsize == BLOCK_SIZE_MB16X16);
- subsize = BLOCK_SIZE_SB8X8;
+ assert(bsize == BLOCK_16X16);
+ subsize = BLOCK_8X8;
}
for (n = 0; n < 4; n++) {
@@ -211,7 +210,7 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi,
void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ struct segmentation *seg = &cpi->mb.e_mbd.seg;
int no_pred_cost;
int t_pred_cost = INT_MAX;
@@ -231,8 +230,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// Set default state for the segment tree probabilities and the
// temporal coding probabilities
- vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs));
- vpx_memset(xd->seg.pred_probs, 255, sizeof(xd->seg.pred_probs));
+ vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
+ vpx_memset(seg->pred_probs, 255, sizeof(seg->pred_probs));
vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts));
vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts));
@@ -249,21 +248,21 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
mi_col += 8, mi += 8)
count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64);
+ t_unpred_seg_counts, mi_row, mi_col, BLOCK_64X64);
}
}
// Work out probability tree for coding segments without prediction
// and the cost.
- calc_segtree_probs(xd, no_pred_segcounts, no_pred_tree);
- no_pred_cost = cost_segmap(xd, no_pred_segcounts, no_pred_tree);
+ calc_segtree_probs(no_pred_segcounts, no_pred_tree);
+ no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree);
// Key frames cannot use temporal prediction
if (cm->frame_type != KEY_FRAME) {
// Work out probability tree for coding those segments not
// predicted using the temporal method and the cost.
- calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree);
- t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree);
+ calc_segtree_probs(t_unpred_seg_counts, t_pred_tree);
+ t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree);
// Add in the cost of the signalling for each prediction context
for (i = 0; i < PREDICTION_PROBS; i++) {
@@ -280,11 +279,11 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// Now choose which coding method to use.
if (t_pred_cost < no_pred_cost) {
- xd->seg.temporal_update = 1;
- vpx_memcpy(xd->seg.tree_probs, t_pred_tree, sizeof(t_pred_tree));
- vpx_memcpy(xd->seg.pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
+ seg->temporal_update = 1;
+ vpx_memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree));
+ vpx_memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
} else {
- xd->seg.temporal_update = 0;
- vpx_memcpy(xd->seg.tree_probs, no_pred_tree, sizeof(no_pred_tree));
+ seg->temporal_update = 0;
+ vpx_memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree));
}
}
diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c
index 821b7c6ca..a692c010e 100644
--- a/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -40,10 +40,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
int mv_col,
uint8_t *pred) {
const int which_mv = 0;
- int_mv mv;
-
- mv.as_mv.row = mv_row;
- mv.as_mv.col = mv_col;
+ MV mv = { mv_row, mv_col };
vp9_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c
index 4b9c6c8b4..caa89b218 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/libvpx/vp9/encoder/vp9_tokenize.c
@@ -25,8 +25,8 @@
compressions, then generating vp9_context.c = initial stats. */
#ifdef ENTROPY_STATS
-vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES];
-extern vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES];
+vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
#endif /* ENTROPY_STATS */
DECLARE_ALIGNED(16, extern const uint8_t,
@@ -40,7 +40,7 @@ const int *vp9_dct_value_cost_ptr;
static void fill_value_tokens() {
TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
- vp9_extra_bit *const e = vp9_extra_bits;
+ const vp9_extra_bit *const e = vp9_extra_bits;
int i = -DCT_MAX_VALUE;
int sign = 1;
@@ -69,7 +69,7 @@ static void fill_value_tokens() {
// initialize the cost for extra bits for all possible coefficient value.
{
int cost = 0;
- vp9_extra_bit *p = vp9_extra_bits + t[i].token;
+ const vp9_extra_bit *p = vp9_extra_bits + t[i].token;
if (p->base_val) {
const int extra = t[i].extra;
@@ -95,18 +95,40 @@ struct tokenize_b_args {
MACROBLOCKD *xd;
TOKENEXTRA **tp;
TX_SIZE tx_size;
- int dry_run;
};
+static void set_entropy_context_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, void *arg) {
+ struct tokenize_b_args* const args = arg;
+ TX_SIZE tx_size = ss_txfrm_size >> 1;
+ MACROBLOCKD *xd = args->xd;
+ const int bwl = b_width_log2(bsize);
+ const int off = block >> (2 * tx_size);
+ const int mod = bwl - tx_size - xd->plane[plane].subsampling_x;
+ const int aoff = (off & ((1 << mod) - 1)) << tx_size;
+ const int loff = (off >> mod) << tx_size;
+ ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff;
+ ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff;
+ const int eob = xd->plane[plane].eobs[block];
+ const int tx_size_in_blocks = 1 << tx_size;
+
+ if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
+ set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff,
+ A, L);
+ } else {
+ vpx_memset(A, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ vpx_memset(L, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ }
+}
+
static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size, void *arg) {
struct tokenize_b_args* const args = arg;
VP9_COMP *cpi = args->cpi;
MACROBLOCKD *xd = args->xd;
TOKENEXTRA **tp = args->tp;
- TX_SIZE tx_size = ss_txfrm_size / 2;
- int dry_run = args->dry_run;
-
+ const TX_SIZE tx_size = ss_txfrm_size >> 1;
+ const int tx_size_in_blocks = 1 << tx_size;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt; /* near block/prev token context index */
int c = 0, rc = 0;
@@ -114,9 +136,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int eob = xd->plane[plane].eobs[block];
const PLANE_TYPE type = xd->plane[plane].plane_type;
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
- const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ?
- BLOCK_SIZE_SB8X8 : mbmi->sb_type;
- const int bwl = b_width_log2(sb_type);
+ const int bwl = b_width_log2(bsize);
const int off = block >> (2 * tx_size);
const int mod = bwl - tx_size - xd->plane[plane].subsampling_x;
const int aoff = (off & ((1 << mod) - 1)) << tx_size;
@@ -128,7 +148,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int16_t *scan, *nb;
vp9_coeff_count *counts;
vp9_coeff_probs_model *coef_probs;
- const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
+ const int ref = is_inter_block(mbmi);
ENTROPY_CONTEXT above_ec, left_ec;
uint8_t token_cache[1024];
const uint8_t *band_translate;
@@ -146,22 +166,22 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
band_translate = vp9_coefband_trans_4x4;
break;
case TX_8X8:
- above_ec = (A[0] + A[1]) != 0;
- left_ec = (L[0] + L[1]) != 0;
+ above_ec = !!*(uint16_t *)A;
+ left_ec = !!*(uint16_t *)L;
seg_eob = 64;
scan = get_scan_8x8(get_tx_type_8x8(type, xd));
band_translate = vp9_coefband_trans_8x8plus;
break;
case TX_16X16:
- above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
+ above_ec = !!*(uint32_t *)A;
+ left_ec = !!*(uint32_t *)L;
seg_eob = 256;
scan = get_scan_16x16(get_tx_type_16x16(type, xd));
band_translate = vp9_coefband_trans_8x8plus;
break;
case TX_32X32:
- above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
+ above_ec = !!*(uint64_t *)A;
+ left_ec = !!*(uint64_t *)L;
seg_eob = 1024;
scan = vp9_default_scan_32x32;
band_translate = vp9_coefband_trans_8x8plus;
@@ -198,22 +218,21 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
- if (!dry_run) {
- ++counts[type][ref][band][pt][token];
- if (!t->skip_eob_node)
- ++cpi->common.counts.eob_branch[tx_size][type][ref][band][pt];
- }
- token_cache[scan[c]] = vp9_pt_energy_class[token];
+ ++counts[type][ref][band][pt][token];
+ if (!t->skip_eob_node)
+ ++cpi->common.counts.eob_branch[tx_size][type][ref][band][pt];
+
+ token_cache[rc] = vp9_pt_energy_class[token];
++t;
} while (c < eob && ++c < seg_eob);
*tp = t;
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
- set_contexts_on_border(xd, bsize, plane, tx_size, c, aoff, loff, A, L);
+ set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, c, aoff, loff,
+ A, L);
} else {
- for (pt = 0; pt < (1 << tx_size); pt++) {
- A[pt] = L[pt] = c > 0;
- }
+ vpx_memset(A, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ vpx_memset(L, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
}
}
@@ -257,8 +276,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
const int mb_skip_context = vp9_get_pred_context_mbskip(xd);
const int skip_inc = !vp9_segfeature_active(&xd->seg, mbmi->segment_id,
SEG_LVL_SKIP);
- const TX_SIZE txfm_size = mbmi->txfm_size;
- struct tokenize_b_args arg = { cpi, xd, t, txfm_size, dry_run };
+ struct tokenize_b_args arg = {cpi, xd, t, mbmi->txfm_size};
mbmi->mb_skip_coeff = vp9_sb_is_skippable(xd, bsize);
if (mbmi->mb_skip_coeff) {
@@ -270,13 +288,13 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
return;
}
- if (!dry_run)
+ if (!dry_run) {
cm->counts.mbskip[mb_skip_context][0] += skip_inc;
-
- foreach_transformed_block(xd, bsize, tokenize_b, &arg);
-
- if (dry_run)
+ foreach_transformed_block(xd, bsize, tokenize_b, &arg);
+ } else {
+ foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
*t = t_backup;
+ }
}
#ifdef ENTROPY_STATS
diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h
index bc7d9352e..968bec75e 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.h
+++ b/libvpx/vp9/encoder/vp9_tokenize.h
@@ -43,7 +43,7 @@ void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
void init_context_counters();
void print_context_counters();
-extern vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES];
+extern vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES];
#endif
extern const int *vp9_dct_value_cost_ptr;
diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk
index 5a0c1c958..b2b2a80a7 100644
--- a/libvpx/vp9/vp9_common.mk
+++ b/libvpx/vp9/vp9_common.mk
@@ -74,7 +74,6 @@ VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_ss
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
@@ -83,6 +82,10 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
endif
+ifeq ($(USE_X86INC),yes)
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
+endif
+
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
@@ -91,5 +94,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index dee83c9e4..288c0d829 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -83,11 +83,15 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
+
+ifeq ($(USE_X86INC),yes)
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
+endif
+
ifeq ($(ARCH_X86_64),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
endif
diff --git a/libvpx/vp9/vp9dx.mk b/libvpx/vp9/vp9dx.mk
index 6cad29329..be3afe835 100644
--- a/libvpx/vp9/vp9dx.mk
+++ b/libvpx/vp9/vp9dx.mk
@@ -28,6 +28,8 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
+VP9_DX_SRCS-yes += decoder/vp9_thread.c
+VP9_DX_SRCS-yes += decoder/vp9_thread.h
VP9_DX_SRCS-yes += decoder/vp9_treereader.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c
diff --git a/libvpx/vpx_scale/generic/yv12config.c b/libvpx/vpx_scale/generic/yv12config.c
index b18155be6..259204065 100644
--- a/libvpx/vpx_scale/generic/yv12config.c
+++ b/libvpx/vpx_scale/generic/yv12config.c
@@ -60,7 +60,7 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
/* Only support allocating buffers that have a border that's a multiple
* of 32. The border restriction is required to get 16-byte alignment of
- * the start of the chroma rows without intoducing an arbitrary gap
+ * the start of the chroma rows without introducing an arbitrary gap
* between planes, which would break the semantics of things like
* vpx_img_set_rect(). */
if (border & 0x1f)
@@ -158,7 +158,7 @@ int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
/* Only support allocating buffers that have a border that's a multiple
* of 32. The border restriction is required to get 16-byte alignment of
- * the start of the chroma rows without intoducing an arbitrary gap
+ * the start of the chroma rows without introducing an arbitrary gap
* between planes, which would break the semantics of things like
* vpx_img_set_rect(). */
if (border & 0x1f)
diff --git a/mips-dspr2/libvpx_srcs.txt b/mips-dspr2/libvpx_srcs.txt
index d75620883..299d615be 100644
--- a/mips-dspr2/libvpx_srcs.txt
+++ b/mips-dspr2/libvpx_srcs.txt
@@ -203,6 +203,8 @@ vp9/decoder/vp9_onyxd.h
vp9/decoder/vp9_onyxd_if.c
vp9/decoder/vp9_onyxd_int.h
vp9/decoder/vp9_read_bit_buffer.h
+vp9/decoder/vp9_thread.c
+vp9/decoder/vp9_thread.h
vp9/decoder/vp9_treereader.h
vp9/vp9_common.mk
vp9/vp9_dx_iface.c
diff --git a/mips-dspr2/vp9_rtcd.h b/mips-dspr2/vp9_rtcd.h
index 0752f4590..d6dc6bfb6 100644
--- a/mips-dspr2/vp9_rtcd.h
+++ b/mips-dspr2/vp9_rtcd.h
@@ -14,9 +14,7 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct4x4_add vp9_short_idct4x4_add_c
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c
+
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct8x8_add vp9_short_idct8x8_add_c
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct16x16_add vp9_short_idct16x16_add_c
@@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c
-
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct32x32_add vp9_short_idct32x32_add_c
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output);
#define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c
-
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_short_iht4x4_add vp9_short_iht4x4_add_c
diff --git a/mips-dspr2/vpx_config.h b/mips-dspr2/vpx_config.h
index 13a092db0..e85b676d4 100644
--- a/mips-dspr2/vpx_config.h
+++ b/mips-dspr2/vpx_config.h
@@ -39,6 +39,7 @@
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0
diff --git a/mips/libvpx_srcs.txt b/mips/libvpx_srcs.txt
index 402ac2420..055f5fb5d 100644
--- a/mips/libvpx_srcs.txt
+++ b/mips/libvpx_srcs.txt
@@ -197,6 +197,8 @@ vp9/decoder/vp9_onyxd.h
vp9/decoder/vp9_onyxd_if.c
vp9/decoder/vp9_onyxd_int.h
vp9/decoder/vp9_read_bit_buffer.h
+vp9/decoder/vp9_thread.c
+vp9/decoder/vp9_thread.h
vp9/decoder/vp9_treereader.h
vp9/vp9_common.mk
vp9/vp9_dx_iface.c
diff --git a/mips/vp9_rtcd.h b/mips/vp9_rtcd.h
index 0752f4590..d6dc6bfb6 100644
--- a/mips/vp9_rtcd.h
+++ b/mips/vp9_rtcd.h
@@ -14,9 +14,7 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"
-struct loop_filter_info;
struct macroblockd;
-struct loop_filter_info;
/* Encoder forward decls */
struct macroblock;
@@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct4x4_add vp9_short_idct4x4_add_c
+void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c
+
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct8x8_add vp9_short_idct8x8_add_c
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c
-void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c
+void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct16x16_add vp9_short_idct16x16_add_c
@@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c
-void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output);
-#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c
-
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
#define vp9_short_idct32x32_add vp9_short_idct32x32_add_c
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output);
#define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c
-void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c
-
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_short_iht4x4_add vp9_short_iht4x4_add_c
diff --git a/mips/vpx_config.h b/mips/vpx_config.h
index 51ea388f1..7db47f873 100644
--- a/mips/vpx_config.h
+++ b/mips/vpx_config.h
@@ -39,6 +39,7 @@
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0