diff options
88 files changed, 3734 insertions, 2249 deletions
diff --git a/armv7a-neon/libvpx_srcs.txt b/armv7a-neon/libvpx_srcs.txt index 7f331c0cf..25ca5e0f8 100644 --- a/armv7a-neon/libvpx_srcs.txt +++ b/armv7a-neon/libvpx_srcs.txt @@ -208,6 +208,7 @@ vp9/common/arm/neon/vp9_convolve8_neon.asm.s vp9/common/arm/neon/vp9_convolve_neon.c vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm.s vp9/common/arm/neon/vp9_loopfilter_neon.asm.s +vp9/common/arm/neon/vp9_mb_lpf_neon.asm.s vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm.s vp9/common/generic/vp9_systemdependent.c vp9/common/vp9_alloccommon.c @@ -282,6 +283,8 @@ vp9/decoder/vp9_onyxd.h vp9/decoder/vp9_onyxd_if.c vp9/decoder/vp9_onyxd_int.h vp9/decoder/vp9_read_bit_buffer.h +vp9/decoder/vp9_thread.c +vp9/decoder/vp9_thread.h vp9/decoder/vp9_treereader.h vp9/vp9_common.mk vp9/vp9_dx_iface.c diff --git a/armv7a-neon/vp9_rtcd.h b/armv7a-neon/vp9_rtcd.h index 6e6ff717b..4ebb49773 100644 --- a/armv7a-neon/vp9_rtcd.h +++ b/armv7a-neon/vp9_rtcd.h @@ -14,9 +14,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -207,7 +205,8 @@ void vp9_add_constant_residual_32x32_neon(const int16_t diff, uint8_t *dest, int #define vp9_add_constant_residual_32x32 vp9_add_constant_residual_32x32_neon void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vp9_mb_lpf_vertical_edge_w vp9_mb_lpf_vertical_edge_w_c +void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vp9_mb_lpf_vertical_edge_w vp9_mb_lpf_vertical_edge_w_neon void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -218,7 +217,8 @@ void vp9_loop_filter_vertical_edge_neon(uint8_t *s, int pitch, const uint8_t *bl #define vp9_loop_filter_vertical_edge vp9_loop_filter_vertical_edge_neon void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); -#define vp9_mb_lpf_horizontal_edge_w vp9_mb_lpf_horizontal_edge_w_c +void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_mb_lpf_horizontal_edge_w vp9_mb_lpf_horizontal_edge_w_neon void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); @@ -273,6 +273,9 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct4x4_add vp9_short_idct4x4_add_c +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c + void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct8x8_add vp9_short_idct8x8_add_neon @@ -280,8 +283,8 @@ void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct16x16_add vp9_short_idct16x16_add_c @@ -289,18 +292,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c - void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct32x32_add vp9_short_idct32x32_add_c void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output); #define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c - void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type); #define vp9_short_iht4x4_add vp9_short_iht4x4_add_c diff --git a/armv7a-neon/vpx_config.h b/armv7a-neon/vpx_config.h index 6f45f7ec7..d132e4d60 100644 --- a/armv7a-neon/vpx_config.h +++ b/armv7a-neon/vpx_config.h @@ -39,6 +39,7 @@ #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 #define CONFIG_DEBUG 0 #define CONFIG_GPROF 0 #define CONFIG_GCOV 0 diff --git a/armv7a/libvpx_srcs.txt b/armv7a/libvpx_srcs.txt index a929dc3ca..2ddb1bdd0 100644 --- a/armv7a/libvpx_srcs.txt +++ b/armv7a/libvpx_srcs.txt @@ -237,6 +237,8 @@ vp9/decoder/vp9_onyxd.h vp9/decoder/vp9_onyxd_if.c vp9/decoder/vp9_onyxd_int.h vp9/decoder/vp9_read_bit_buffer.h +vp9/decoder/vp9_thread.c +vp9/decoder/vp9_thread.h vp9/decoder/vp9_treereader.h vp9/vp9_common.mk vp9/vp9_dx_iface.c diff --git a/armv7a/vp9_rtcd.h b/armv7a/vp9_rtcd.h index d6b244db4..1ce24c553 100644 --- a/armv7a/vp9_rtcd.h +++ b/armv7a/vp9_rtcd.h @@ -14,9 +14,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct4x4_add vp9_short_idct4x4_add_c +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c + void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct8x8_add vp9_short_idct8x8_add_c void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct16x16_add vp9_short_idct16x16_add_c @@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c - void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct32x32_add vp9_short_idct32x32_add_c void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output); #define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c - void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type); #define vp9_short_iht4x4_add vp9_short_iht4x4_add_c diff --git a/armv7a/vpx_config.h b/armv7a/vpx_config.h index be08d2a25..a330023f9 100644 --- a/armv7a/vpx_config.h +++ b/armv7a/vpx_config.h @@ -39,6 +39,7 @@ #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 #define CONFIG_DEBUG 0 #define CONFIG_GPROF 0 #define CONFIG_GCOV 0 diff --git a/generic/libvpx_srcs.txt b/generic/libvpx_srcs.txt index 402ac2420..055f5fb5d 100644 --- a/generic/libvpx_srcs.txt +++ b/generic/libvpx_srcs.txt @@ -197,6 +197,8 @@ vp9/decoder/vp9_onyxd.h vp9/decoder/vp9_onyxd_if.c vp9/decoder/vp9_onyxd_int.h vp9/decoder/vp9_read_bit_buffer.h +vp9/decoder/vp9_thread.c +vp9/decoder/vp9_thread.h vp9/decoder/vp9_treereader.h vp9/vp9_common.mk vp9/vp9_dx_iface.c diff --git a/generic/vp9_rtcd.h b/generic/vp9_rtcd.h index c0824cb16..2562e82c5 100644 --- a/generic/vp9_rtcd.h +++ b/generic/vp9_rtcd.h @@ -14,9 +14,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct4x4_add vp9_short_idct4x4_add_c +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c + void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct8x8_add vp9_short_idct8x8_add_c void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct16x16_add vp9_short_idct16x16_add_c @@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c - void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct32x32_add vp9_short_idct32x32_add_c void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output); #define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c - void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type); #define vp9_short_iht4x4_add vp9_short_iht4x4_add_c diff --git a/generic/vpx_config.h b/generic/vpx_config.h index 37dcff976..4d6172b8d 100644 --- a/generic/vpx_config.h +++ b/generic/vpx_config.h @@ -39,6 +39,7 @@ #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 #define CONFIG_DEBUG 0 #define CONFIG_GPROF 0 #define CONFIG_GCOV 0 diff --git a/libvpx/README b/libvpx/README index 92cc0742c..d7cb11afb 100644 --- a/libvpx/README +++ b/libvpx/README @@ -1,7 +1,7 @@ vpx Multi-Format Codec SDK -README - 21 June 2012 +README - 1 August 2013 -Welcome to the WebM VP8 Codec SDK! +Welcome to the WebM VP8/VP9 Codec SDK! COMPILING THE APPLICATIONS/LIBRARIES: The build system used is similar to autotools. Building generally consists of @@ -53,33 +53,63 @@ COMPILING THE APPLICATIONS/LIBRARIES: armv5te-android-gcc armv5te-linux-rvct armv5te-linux-gcc + armv5te-none-rvct armv6-darwin-gcc armv6-linux-rvct armv6-linux-gcc + armv6-none-rvct armv7-android-gcc + armv7-darwin-gcc armv7-linux-rvct armv7-linux-gcc + armv7-none-rvct + armv7-win32-vs11 mips32-linux-gcc ppc32-darwin8-gcc ppc32-darwin9-gcc + ppc32-linux-gcc ppc64-darwin8-gcc ppc64-darwin9-gcc ppc64-linux-gcc + sparc-solaris-gcc + x86-android-gcc x86-darwin8-gcc x86-darwin8-icc x86-darwin9-gcc x86-darwin9-icc + x86-darwin10-gcc + x86-darwin11-gcc + x86-darwin12-gcc + x86-darwin13-gcc x86-linux-gcc x86-linux-icc + x86-os2-gcc x86-solaris-gcc + x86-win32-gcc x86-win32-vs7 x86-win32-vs8 + x86-win32-vs9 + x86-win32-vs10 + x86-win32-vs11 x86_64-darwin9-gcc + x86_64-darwin10-gcc + x86_64-darwin11-gcc + x86_64-darwin12-gcc + x86_64-darwin13-gcc x86_64-linux-gcc + x86_64-linux-icc x86_64-solaris-gcc + x86_64-win64-gcc x86_64-win64-vs8 + x86_64-win64-vs9 + x86_64-win64-vs10 + x86_64-win64-vs11 universal-darwin8-gcc universal-darwin9-gcc + universal-darwin10-gcc + universal-darwin11-gcc + universal-darwin12-gcc + universal-darwin13-gcc generic-gnu The generic-gnu target, in conjunction with the CROSS environment variable, diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh index 30a61067f..e2566b0a7 100755 --- a/libvpx/build/make/configure.sh +++ b/libvpx/build/make/configure.sh @@ -1189,6 +1189,12 @@ EOF fi fi + # default use_x86inc to yes if pic is no or 64bit or we are not on darwin + echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" " + if [ ${tgt_isa} = x86_64 -o ! "$pic" == "yes" -o ! ${tgt_os:0:6} = darwin ]; then + soft_enable use_x86inc + fi + # Position Independent Code (PIC) support, for building relocatable # shared objects enabled gcc && enabled pic && check_add_cflags -fPIC diff --git a/libvpx/build/make/gen_msvs_sln.sh b/libvpx/build/make/gen_msvs_sln.sh index f9fc69428..0c269b16b 100755 --- a/libvpx/build/make/gen_msvs_sln.sh +++ b/libvpx/build/make/gen_msvs_sln.sh @@ -72,15 +72,21 @@ parse_project() { eval "${var}_name=$name" eval "${var}_guid=$guid" - # assume that all projects have the same list of possible configurations, - # so overwriting old config_lists is not a problem if [ "$sfx" = "vcproj" ]; then - config_list=`grep -A1 '<Configuration' $file | + cur_config_list=`grep -A1 '<Configuration' $file | grep Name | cut -d\" -f2` else - config_list=`grep -B1 'Label="Configuration"' $file | + cur_config_list=`grep -B1 'Label="Configuration"' $file | grep Condition | cut -d\' -f4` fi + new_config_list=$(for i in $config_list $cur_config_list; do + echo $i + done | sort | uniq) + if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then + mixed_platforms=1 + fi + config_list="$new_config_list" + eval "${var}_config_list=\"$cur_config_list\"" proj_list="${proj_list} ${var}" } @@ -130,6 +136,11 @@ process_global() { indent_push IFS_bak=${IFS} IFS=$'\r'$'\n' + if [ "$mixed_platforms" != "" ]; then + config_list=" +Release|Mixed Platforms +Debug|Mixed Platforms" + fi for config in ${config_list}; do echo "${indent}$config = $config" done @@ -144,10 +155,17 @@ process_global() { indent_push for proj in ${proj_list}; do eval "local proj_guid=\${${proj}_guid}" + eval "local proj_config_list=\${${proj}_config_list}" IFS=$'\r'$'\n' - for config in ${config_list}; do - echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}" - echo "${indent}${proj_guid}.${config}.Build.0 = ${config}" + for config in ${proj_config_list}; do + if [ "$mixed_platforms" != "" ]; then + local c=${config%%|*} + echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}" + echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}" + else + echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}" + echo "${indent}${proj_guid}.${config}.Build.0 = ${config}" + fi done IFS=${IFS_bak} diff --git a/libvpx/configure b/libvpx/configure index 3651334e2..24be893f7 100755 --- a/libvpx/configure +++ b/libvpx/configure @@ -257,6 +257,7 @@ CONFIG_LIST=" install_bins install_libs install_srcs + use_x86inc debug gprof gcov diff --git a/libvpx/libs.mk b/libvpx/libs.mk index 4aa7dc48a..233863108 100644 --- a/libvpx/libs.mk +++ b/libvpx/libs.mk @@ -57,6 +57,13 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h RTCD += $$(BUILD_PFX)$(1).h endef +# x86inc.asm is not compatible with pic 32bit builds. Restrict +# files which use it to 64bit builds or 32bit without pic +USE_X86INC = no +ifeq ($(CONFIG_USE_X86INC),yes) + USE_X86INC = yes +endif + CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc index 3b72129cc..b1510c648 100644 --- a/libvpx/test/convolve_test.cc +++ b/libvpx/test/convolve_test.cc @@ -527,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( #if HAVE_SSSE3 const ConvolveFunctions convolve8_ssse3( - vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c, - vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c, - vp9_convolve8_ssse3, vp9_convolve8_avg_c); + vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, + vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, + vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3); INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_ssse3), diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk index 619533a38..25e05b9fc 100644 --- a/libvpx/test/test.mk +++ b/libvpx/test/test.mk @@ -89,6 +89,7 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc diff --git a/libvpx/test/vp9_subtract_test.cc b/libvpx/test/vp9_subtract_test.cc index 3e5fe8d6a..24767957f 100644 --- a/libvpx/test/vp9_subtract_test.cc +++ b/libvpx/test/vp9_subtract_test.cc @@ -39,7 +39,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) { ACMRandom rnd(ACMRandom::DeterministicSeed()); // FIXME(rbultje) split in its own file - for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES; + for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES; bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) { const int block_width = 4 << b_width_log2(bsize); const int block_height = 4 << b_height_log2(bsize); @@ -93,9 +93,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) { INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest, ::testing::Values(vp9_subtract_block_c)); -#if HAVE_SSE2 +#if HAVE_SSE2 && CONFIG_USE_X86INC INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest, ::testing::Values(vp9_subtract_block_sse2)); #endif - } // namespace vp9 diff --git a/libvpx/test/vp9_thread_test.cc b/libvpx/test/vp9_thread_test.cc new file mode 100644 index 000000000..41d22dd3a --- /dev/null +++ b/libvpx/test/vp9_thread_test.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/decoder/vp9_thread.h" + +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/md5_helper.h" +#include "test/webm_video_source.h" + +namespace { + +class VP9WorkerThreadTest : public ::testing::Test { + protected: + virtual ~VP9WorkerThreadTest() {} + virtual void SetUp() { + vp9_worker_init(&worker_); + } + + virtual void TearDown() { + vp9_worker_end(&worker_); + } + + VP9Worker worker_; +}; + +int ThreadHook(void* data, void* return_value) { + int* const hook_data = reinterpret_cast<int*>(data); + *hook_data = 5; + return *reinterpret_cast<int*>(return_value); +} + +TEST_F(VP9WorkerThreadTest, HookSuccess) { + EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op. + + for (int i = 0; i < 2; ++i) { + EXPECT_TRUE(vp9_worker_reset(&worker_)); + + int hook_data = 0; + int return_value = 1; // return successfully from the hook + worker_.hook = ThreadHook; + worker_.data1 = &hook_data; + worker_.data2 = &return_value; + + vp9_worker_launch(&worker_); + EXPECT_TRUE(vp9_worker_sync(&worker_)); + EXPECT_FALSE(worker_.had_error); + EXPECT_EQ(5, hook_data); + + EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op. + } +} + +TEST_F(VP9WorkerThreadTest, HookFailure) { + EXPECT_TRUE(vp9_worker_reset(&worker_)); + + int hook_data = 0; + int return_value = 0; // return failure from the hook + worker_.hook = ThreadHook; + worker_.data1 = &hook_data; + worker_.data2 = &return_value; + + vp9_worker_launch(&worker_); + EXPECT_FALSE(vp9_worker_sync(&worker_)); + EXPECT_TRUE(worker_.had_error); + + // Ensure _reset() clears the error and _launch() can be called again. + return_value = 1; + EXPECT_TRUE(vp9_worker_reset(&worker_)); + EXPECT_FALSE(worker_.had_error); + vp9_worker_launch(&worker_); + EXPECT_TRUE(vp9_worker_sync(&worker_)); + EXPECT_FALSE(worker_.had_error); +} + +TEST(VP9DecodeMTTest, MTDecode) { + libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm"); + video.Init(); + + vpx_codec_dec_cfg_t cfg = {0}; + cfg.threads = 2; + libvpx_test::VP9Decoder decoder(cfg, 0); + + libvpx_test::MD5 md5; + for (video.Begin(); video.cxdata(); video.Next()) { + const vpx_codec_err_t res = + decoder.DecodeFrame(video.cxdata(), video.frame_size()); + ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); + + libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img = NULL; + + // Get decompressed data + while ((img = dec_iter.Next())) { + md5.Add(img); + } + } + EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get()); +} + +} // namespace diff --git a/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm b/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm index 15039e267..110a56cdd 100644 --- a/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm +++ b/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm @@ -52,15 +52,15 @@ ; sp[]int h |vp9_convolve8_avg_horiz_neon| PROC + ldr r12, [sp, #4] ; x_step_q4 + cmp r12, #16 + bne vp9_convolve8_avg_horiz_c + push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps - ldr r4, [sp, #36] ; x_step_q4 ldr r5, [sp, #32] ; filter_x - cmp r4, #16 - bne call_horiz_c_convolve ; x_step_q4 != 16 - ldr r6, [sp, #48] ; w ldr r7, [sp, #52] ; h @@ -82,22 +82,22 @@ mov r10, r6 ; w loop counter loop_horiz - vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]! - vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]! + vld1.8 {d24}, [r0]! vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9 - vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]! - vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]! + vld1.8 {d25}, [r0]! vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9 - vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]! - vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]! + vld1.8 {d26}, [r0]! vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9 - vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]! - vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]! + vld1.8 {d27}, [r0]! vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8 + vtrn.16 q12, q13 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + ; extract to s16 vmovl.u8 q8, d24 vmovl.u8 q9, d25 @@ -128,8 +128,8 @@ loop_horiz vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -137,10 +137,7 @@ loop_horiz vtrn.8 d2, d3 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d3[0]}, [r2], r3 @@ -159,26 +156,20 @@ loop_horiz pop {r4-r10, pc} -call_horiz_c_convolve - pop {r4-r10, lr} - add r0, r0, #3 ; un-adjust for taps - b vp9_convolve8_avg_horiz_c - - ENDP |vp9_convolve8_avg_vert_neon| PROC + ldr r12, [sp, #12] + cmp r12, #16 + bne vp9_convolve8_avg_vert_c + push {r4-r10, lr} ; adjust for taps sub r0, r0, r1 sub r0, r0, r1, lsl #1 - ldr r6, [sp, #44] ; y_step_q4 ldr r7, [sp, #40] ; filter_y - cmp r6, #16 - bne call_vert_c_convolve ; y_step_q4 != 16 - ldr r8, [sp, #48] ; w ldr r9, [sp, #52] ; h @@ -240,14 +231,11 @@ loop_vert vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 @@ -266,12 +254,5 @@ loop_vert pop {r4-r10, pc} -call_vert_c_convolve - pop {r4-r10, lr} - ; un-adjust for taps - add r0, r0, r1 - add r0, r0, r1, lsl #1 - b vp9_convolve8_avg_vert_c - ENDP END diff --git a/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm b/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm index 842c73c90..845e4a866 100644 --- a/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm +++ b/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm @@ -52,15 +52,15 @@ ; sp[]int h |vp9_convolve8_horiz_neon| PROC + ldr r12, [sp, #4] ; x_step_q4 + cmp r12, #16 + bne vp9_convolve8_horiz_c + push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps - ldr r4, [sp, #36] ; x_step_q4 ldr r5, [sp, #32] ; filter_x - cmp r4, #16 - bne call_horiz_c_convolve ; x_step_q4 != 16 - ldr r6, [sp, #48] ; w ldr r7, [sp, #52] ; h @@ -82,22 +82,22 @@ mov r10, r6 ; w loop counter loop_horiz - vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]! - vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]! + vld1.8 {d24}, [r0]! vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9 - vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]! - vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]! + vld1.8 {d25}, [r0]! vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9 - vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]! - vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]! + vld1.8 {d26}, [r0]! vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9 - vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]! - vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]! + vld1.8 {d27}, [r0]! vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8 + vtrn.16 q12, q13 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + ; extract to s16 vmovl.u8 q8, d24 vmovl.u8 q9, d25 @@ -120,8 +120,8 @@ loop_horiz vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -145,26 +145,20 @@ loop_horiz pop {r4-r10, pc} -call_horiz_c_convolve - pop {r4-r10, lr} - add r0, r0, #3 ; un-adjust for taps - b vp9_convolve8_horiz_c - - ENDP |vp9_convolve8_vert_neon| PROC + ldr r12, [sp, #12] + cmp r12, #16 + bne vp9_convolve8_vert_c + push {r4-r10, lr} ; adjust for taps sub r0, r0, r1 sub r0, r0, r1, lsl #1 - ldr r6, [sp, #44] ; y_step_q4 ldr r7, [sp, #40] ; filter_y - cmp r6, #16 - bne call_vert_c_convolve ; y_step_q4 != 16 - ldr r8, [sp, #48] ; w ldr r9, [sp, #52] ; h @@ -219,8 +213,8 @@ loop_vert vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 @@ -239,12 +233,5 @@ loop_vert pop {r4-r10, pc} -call_vert_c_convolve - pop {r4-r10, lr} - ; un-adjust for taps - add r0, r0, r1 - add r0, r0, r1, lsl #1 - b vp9_convolve8_vert_c - ENDP END diff --git a/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm new file mode 100644 index 000000000..edf5786e3 --- /dev/null +++ b/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm @@ -0,0 +1,618 @@ +; +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + EXPORT |vp9_mb_lpf_horizontal_edge_w_neon| + EXPORT |vp9_mb_lpf_vertical_edge_w_neon| + ARM + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh +; int count) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +|vp9_mb_lpf_horizontal_edge_w_neon| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + ldr r12, [sp, #92] ; load count + +h_count + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines + + vld1.u8 {d0}, [r8@64], r1 ; p7 + vld1.u8 {d1}, [r8@64], r1 ; p6 + vld1.u8 {d2}, [r8@64], r1 ; p5 + vld1.u8 {d3}, [r8@64], r1 ; p4 + vld1.u8 {d4}, [r8@64], r1 ; p3 + vld1.u8 {d5}, [r8@64], r1 ; p2 + vld1.u8 {d6}, [r8@64], r1 ; p1 + vld1.u8 {d7}, [r8@64], r1 ; p0 + vld1.u8 {d8}, [r8@64], r1 ; q0 + vld1.u8 {d9}, [r8@64], r1 ; q1 + vld1.u8 {d10}, [r8@64], r1 ; q2 + vld1.u8 {d11}, [r8@64], r1 ; q3 + vld1.u8 {d12}, [r8@64], r1 ; q4 + vld1.u8 {d13}, [r8@64], r1 ; q5 + vld1.u8 {d14}, [r8@64], r1 ; q6 + vld1.u8 {d15}, [r8@64], r1 ; q7 + + bl vp9_wide_mbfilter_neon + + tst r7, #1 + beq h_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, r1, lsl #1 + + vst1.u8 {d25}, [r8@64], r1 ; store op1 + vst1.u8 {d24}, [r8@64], r1 ; store op0 + vst1.u8 {d23}, [r8@64], r1 ; store oq0 + vst1.u8 {d26}, [r8@64], r1 ; store oq1 + + b h_next + +h_mbfilter + tst r7, #2 + beq h_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, r1, lsl #1 + sub r8, r8, r1 + + vst1.u8 {d18}, [r8@64], r1 ; store op2 + vst1.u8 {d19}, [r8@64], r1 ; store op1 + vst1.u8 {d20}, [r8@64], r1 ; store op0 + vst1.u8 {d21}, [r8@64], r1 ; store oq0 + vst1.u8 {d22}, [r8@64], r1 ; store oq1 + vst1.u8 {d23}, [r8@64], r1 ; store oq2 + + b h_next + +h_wide_mbfilter + sub r8, r0, r1, lsl #3 + add r8, r8, r1 + + vst1.u8 {d16}, [r8@64], r1 ; store op6 + vst1.u8 {d24}, [r8@64], r1 ; store op5 + vst1.u8 {d25}, [r8@64], r1 ; store op4 + vst1.u8 {d26}, [r8@64], r1 ; store op3 + vst1.u8 {d27}, [r8@64], r1 ; store op2 + vst1.u8 {d18}, [r8@64], r1 ; store op1 + vst1.u8 {d19}, [r8@64], r1 ; store op0 + vst1.u8 {d20}, [r8@64], r1 ; store oq0 + vst1.u8 {d21}, [r8@64], r1 ; store oq1 + vst1.u8 {d22}, [r8@64], r1 ; store oq2 + vst1.u8 {d23}, [r8@64], r1 ; store oq3 + vst1.u8 {d1}, [r8@64], r1 ; store oq4 + vst1.u8 {d2}, [r8@64], r1 ; store oq5 + vst1.u8 {d3}, [r8@64], r1 ; store oq6 + +h_next + add r0, r0, #8 + subs r12, r12, #1 + bne h_count + + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon| + +; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +|vp9_mb_lpf_vertical_edge_w_neon| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, #8 + + vld1.8 {d0}, [r8@64], r1 + vld1.8 {d8}, [r0@64], r1 + vld1.8 {d1}, [r8@64], r1 + vld1.8 {d9}, [r0@64], r1 + vld1.8 {d2}, [r8@64], r1 + vld1.8 {d10}, [r0@64], r1 + vld1.8 {d3}, [r8@64], r1 + vld1.8 {d11}, [r0@64], r1 + vld1.8 {d4}, [r8@64], r1 + vld1.8 {d12}, [r0@64], r1 + vld1.8 {d5}, [r8@64], r1 + vld1.8 {d13}, [r0@64], r1 + vld1.8 {d6}, [r8@64], r1 + vld1.8 {d14}, [r0@64], r1 + vld1.8 {d7}, [r8@64], r1 + vld1.8 {d15}, [r0@64], r1 + + sub r0, r0, r1, lsl #3 + + vtrn.32 q0, q2 + vtrn.32 q1, q3 + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + vtrn.8 d4, d5 + vtrn.8 d6, d7 + + vtrn.8 d8, d9 + vtrn.8 d10, d11 + vtrn.8 d12, d13 + vtrn.8 d14, d15 + + bl vp9_wide_mbfilter_neon + + tst r7, #1 + beq v_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, #2 + + vswp d23, d25 + + vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 + vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 + vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 + vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 + vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 + vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 + vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 + vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 + + b v_end + +v_mbfilter + tst r7, #2 + beq v_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, #3 + + vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 + vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 + vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 + vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 + vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 + vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 + vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 + vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 + vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 + vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 + vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 + vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 + vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 + vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 + vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 + vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 + + b v_end + +v_wide_mbfilter + sub r8, r0, #8 + + vtrn.32 d0, d26 + vtrn.32 d16, d27 + vtrn.32 d24, d18 + vtrn.32 d25, d19 + + vtrn.16 d0, d24 + vtrn.16 d16, d25 + vtrn.16 d26, d18 + vtrn.16 d27, d19 + + vtrn.8 d0, d16 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + vtrn.8 d18, d19 + + vtrn.32 d20, d1 + vtrn.32 d21, d2 + vtrn.32 d22, d3 + vtrn.32 d23, d15 + + vtrn.16 d20, d22 + vtrn.16 d21, d23 + vtrn.16 d1, d3 + vtrn.16 d2, d15 + + vtrn.8 d20, d21 + vtrn.8 d22, d23 + vtrn.8 d1, d2 + vtrn.8 d3, d15 + + vst1.8 {d0}, [r8@64], r1 + vst1.8 {d20}, [r0@64], r1 + vst1.8 {d16}, [r8@64], r1 + vst1.8 {d21}, [r0@64], r1 + vst1.8 {d24}, [r8@64], r1 + vst1.8 {d22}, [r0@64], r1 + vst1.8 {d25}, [r8@64], r1 + vst1.8 {d23}, [r0@64], r1 + vst1.8 {d26}, [r8@64], r1 + vst1.8 {d1}, [r0@64], r1 + vst1.8 {d27}, [r8@64], r1 + vst1.8 {d2}, [r0@64], r1 + vst1.8 {d18}, [r8@64], r1 + vst1.8 {d3}, [r0@64], r1 + vst1.8 {d19}, [r8@64], r1 + vst1.8 {d15}, [r0@64], r1 + +v_end + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |vp9_mb_lpf_vertical_edge_w_neon| + +; void vp9_wide_mbfilter_neon(); +; This is a helper function for the loopfilters. The invidual functions do the +; necessary load, transpose (if necessary) and store. +; +; r0-r3 PRESERVE +; d16 blimit +; d17 limit +; d18 thresh +; d0 p7 +; d1 p6 +; d2 p5 +; d3 p4 +; d4 p3 +; d5 p2 +; d6 p1 +; d7 p0 +; d8 q0 +; d9 q1 +; d10 q2 +; d11 q3 +; d12 q4 +; d13 q5 +; d14 q6 +; d15 q7 +|vp9_wide_mbfilter_neon| PROC + mov r7, #0 + + ; filter_mask + vabd.u8 d19, d4, d5 ; abs(p3 - p2) + vabd.u8 d20, d5, d6 ; abs(p2 - p1) + vabd.u8 d21, d6, d7 ; abs(p1 - p0) + vabd.u8 d22, d9, d8 ; abs(q1 - q0) + vabd.u8 d23, d10, d9 ; abs(q2 - q1) + vabd.u8 d24, d11, d10 ; abs(q3 - q2) + + ; only compare the largest value to limit + vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) + vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) + vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) + vmax.u8 d19, d19, d20 + + vabd.u8 d24, d7, d8 ; abs(p0 - q0) + + vmax.u8 d19, d19, d23 + + vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) + vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 + + ; abs () > limit + vcge.u8 d19, d17, d19 + + ; flatmask4 + vabd.u8 d25, d7, d5 ; abs(p0 - p2) + vabd.u8 d26, d8, d10 ; abs(q0 - q2) + vabd.u8 d27, d4, d7 ; abs(p3 - p0) + vabd.u8 d28, d11, d8 ; abs(q3 - q0) + + ; only compare the largest value to thresh + vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) + vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) + vmax.u8 d25, d25, d26 + vmax.u8 d20, d20, d25 + + vshr.u8 d23, d23, #1 ; a = a / 2 + vqadd.u8 d24, d24, d23 ; a = b + a + + vmov.u8 d30, #1 + vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 + + vcge.u8 d20, d30, d20 ; flat + + vand d19, d19, d24 ; mask + + ; hevmask + vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 + vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 + vorr d21, d21, d22 ; hev + + vand d16, d20, d19 ; flat && mask + vmov r5, r6, d16 + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #1 ; Only do filter branch + + ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) + vabd.u8 d22, d3, d7 ; abs(p4 - p0) + vabd.u8 d23, d12, d8 ; abs(q4 - q0) + vabd.u8 d24, d7, d2 ; abs(p0 - p5) + vabd.u8 d25, d8, d13 ; abs(q0 - q5) + vabd.u8 d26, d1, d7 ; abs(p6 - p0) + vabd.u8 d27, d14, d8 ; abs(q6 - q0) + vabd.u8 d28, d0, d7 ; abs(p7 - p0) + vabd.u8 d29, d15, d8 ; abs(q7 - q0) + + ; only compare the largest value to thresh + vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) + vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) + vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) + vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) + + vmax.u8 d26, d22, d23 + vmax.u8 d27, d24, d25 + vmax.u8 d23, d26, d27 + + vcge.u8 d18, d30, d23 ; flat2 + + vmov.u8 d22, #0x80 + + vand d17, d18, d16 ; flat2 && flat && mask + vmov r5, r6, d17 + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #2 ; Only do mbfilter branch + + ; mbfilter() function + + ; filter() function + ; convert to signed + veor d23, d8, d22 ; qs0 + veor d24, d7, d22 ; ps0 + veor d25, d6, d22 ; ps1 + veor d26, d9, d22 ; qs1 + + vmov.u8 d27, #3 + + vsub.s8 d28, d23, d24 ; ( qs0 - ps0) + + vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) + + vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) + + vand d29, d29, d21 ; filter &= hev + + vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) + + vmov.u8 d29, #4 + + ; filter = clamp(filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d28, q15 + + vand d28, d28, d19 ; filter &= mask + + vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) + vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) + vshr.s8 d30, d30, #3 ; filter2 >>= 3 + vshr.s8 d29, d29, #3 ; filter1 >>= 3 + + + vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) + vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) + + ; outer tap adjustments: ++filter1 >> 1 + vrshr.s8 d29, d29, #1 + vbic d29, d29, d21 ; filter &= ~hev + + vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) + vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) + + veor d24, d24, d22 ; *f_op0 = u^0x80 + veor d23, d23, d22 ; *f_oq0 = u^0x80 + veor d25, d25, d22 ; *f_op1 = u^0x80 + veor d26, d26, d22 ; *f_oq1 = u^0x80 + + tst r7, #1 + bxne lr + + ; mbfilter flat && mask branch + ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's + ; and using vibt on the q's? + vmov.u8 d29, #2 + vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 + vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 + vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 + vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 + vqrshrn.u16 d18, q15, #3 ; r_op2 + + vsubw.u8 q15, d4 ; op1 = op2 - p3 + vsubw.u8 q15, d5 ; op1 -= p2 + vaddw.u8 q15, d6 ; op1 += p1 + vaddw.u8 q15, d9 ; op1 += q1 + vqrshrn.u16 d19, q15, #3 ; r_op1 + + vsubw.u8 q15, d4 ; op0 = op1 - p3 + vsubw.u8 q15, d6 ; op0 -= p1 + vaddw.u8 q15, d7 ; op0 += p0 + vaddw.u8 q15, d10 ; op0 += q2 + vqrshrn.u16 d20, q15, #3 ; r_op0 + + vsubw.u8 q15, d4 ; oq0 = op0 - p3 + vsubw.u8 q15, d7 ; oq0 -= p0 + vaddw.u8 q15, d8 ; oq0 += q0 + vaddw.u8 q15, d11 ; oq0 += q3 + vqrshrn.u16 d21, q15, #3 ; r_oq0 + + vsubw.u8 q15, d5 ; oq1 = oq0 - p2 + vsubw.u8 q15, d8 ; oq1 -= q0 + vaddw.u8 q15, d9 ; oq1 += q1 + vaddw.u8 q15, d11 ; oq1 += q3 + vqrshrn.u16 d22, q15, #3 ; r_oq1 + + vsubw.u8 q15, d6 ; oq2 = oq0 - p1 + vsubw.u8 q15, d9 ; oq2 -= q1 + vaddw.u8 q15, d10 ; oq2 += q2 + vaddw.u8 q15, d11 ; oq2 += q3 + vqrshrn.u16 d27, q15, #3 ; r_oq2 + + ; Filter does not set op2 or oq2, so use p2 and q2. + vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) + vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) + vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) + vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) + vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) + + vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) + vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) + + tst r7, #2 + bxne lr + + ; wide_mbfilter flat2 && flat && mask branch + vmov.u8 d16, #7 + vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 + vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 + vmlal.u8 q15, d1, d29 ; op6 += p6 * 2 + vaddw.u8 q15, d2 ; op6 += p5 + vaddw.u8 q15, d3 ; op6 += p4 + vaddw.u8 q15, d4 ; op6 += p3 + vaddw.u8 q15, d5 ; op6 += p2 + vaddw.u8 q15, d6 ; op6 += p1 + vqrshrn.u16 d16, q15, #4 ; w_op6 + + vsubw.u8 q15, d0 ; op5 = op6 - p7 + vsubw.u8 q15, d1 ; op5 -= p6 + vaddw.u8 q15, d2 ; op5 += p5 + vaddw.u8 q15, d9 ; op5 += q1 + vqrshrn.u16 d24, q15, #4 ; w_op5 + + vsubw.u8 q15, d0 ; op4 = op5 - p7 + vsubw.u8 q15, d2 ; op4 -= p5 + vaddw.u8 q15, d3 ; op4 += p4 + vaddw.u8 q15, d10 ; op4 += q2 + vqrshrn.u16 d25, q15, #4 ; w_op4 + + vsubw.u8 q15, d0 ; op3 = op4 - p7 + vsubw.u8 q15, d3 ; op3 -= p4 + vaddw.u8 q15, d4 ; op3 += p3 + vaddw.u8 q15, d11 ; op3 += q3 + vqrshrn.u16 d26, q15, #4 ; w_op3 + + vsubw.u8 q15, d0 ; op2 = op3 - p7 + vsubw.u8 q15, d4 ; op2 -= p3 + vaddw.u8 q15, d5 ; op2 += p2 + vaddw.u8 q15, d12 ; op2 += q4 + vqrshrn.u16 d27, q15, #4 ; w_op2 + + vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; op1 = op2 - p7 + vsubw.u8 q15, d5 ; op1 -= p2 + vaddw.u8 q15, d6 ; op1 += p1 + vaddw.u8 q15, d13 ; op1 += q5 + vqrshrn.u16 d18, q15, #4 ; w_op1 + + vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; op0 = op1 - p7 + vsubw.u8 q15, d6 ; op0 -= p1 + vaddw.u8 q15, d7 ; op0 += p0 + vaddw.u8 q15, d14 ; op0 += q6 + vqrshrn.u16 d19, q15, #4 ; w_op0 + + vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; oq0 = op0 - p7 + vsubw.u8 q15, d7 ; oq0 -= p0 + vaddw.u8 q15, d8 ; oq0 += q0 + vaddw.u8 q15, d15 ; oq0 += q7 + vqrshrn.u16 d20, q15, #4 ; w_oq0 + + vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) + + vsubw.u8 q15, d1 ; oq1 = oq0 - p6 + vsubw.u8 q15, d8 ; oq1 -= q0 + vaddw.u8 q15, d9 ; oq1 += q1 + vaddw.u8 q15, d15 ; oq1 += q7 + vqrshrn.u16 d21, q15, #4 ; w_oq1 + + vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) + + vsubw.u8 q15, d2 ; oq2 = oq1 - p5 + vsubw.u8 q15, d9 ; oq2 -= q1 + vaddw.u8 q15, d10 ; oq2 += q2 + vaddw.u8 q15, d15 ; oq2 += q7 + vqrshrn.u16 d22, q15, #4 ; w_oq2 + + vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) + + vsubw.u8 q15, d3 ; oq3 = oq2 - p4 + vsubw.u8 q15, d10 ; oq3 -= q2 + vaddw.u8 q15, d11 ; oq3 += q3 + vaddw.u8 q15, d15 ; oq3 += q7 + vqrshrn.u16 d23, q15, #4 ; w_oq3 + + vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) + + vsubw.u8 q15, d4 ; oq4 = oq3 - p3 + vsubw.u8 q15, d11 ; oq4 -= q3 + vaddw.u8 q15, d12 ; oq4 += q4 + vaddw.u8 q15, d15 ; oq4 += q7 + vqrshrn.u16 d1, q15, #4 ; w_oq4 + + vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) + + vsubw.u8 q15, d5 ; oq5 = oq4 - p2 + vsubw.u8 q15, d12 ; oq5 -= q4 + vaddw.u8 q15, d13 ; oq5 += q5 + vaddw.u8 q15, d15 ; oq5 += q7 + vqrshrn.u16 d2, q15, #4 ; w_oq5 + + vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) + + vsubw.u8 q15, d6 ; oq6 = oq5 - p1 + vsubw.u8 q15, d13 ; oq6 -= q5 + vaddw.u8 q15, d14 ; oq6 += q6 + vaddw.u8 q15, d15 ; oq6 += q7 + vqrshrn.u16 d3, q15, #4 ; w_oq6 + + vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) + vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) + vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) + vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) + vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) + + bx lr + ENDP ; |vp9_wide_mbfilter_neon| + + END diff --git a/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm b/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm index 8e4aadac2..f82966577 100644 --- a/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm +++ b/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm @@ -22,8 +22,8 @@ MACRO IDCT8x8_1D ; stage 1 - vdup.16 d0, r3; ; duplicate cospi_28_64 - vdup.16 d1, r4; ; duplicate cospi_4_64 + vdup.16 d0, r3 ; duplicate cospi_28_64 + vdup.16 d1, r4 ; duplicate cospi_4_64 ; input[1] * cospi_28_64 vmull.s16 q2, d18, d0 @@ -57,8 +57,8 @@ vqrshrn.s32 d14, q2, #14 ; >> 14 vqrshrn.s32 d15, q3, #14 ; >> 14 - vdup.16 d0, r5; ; duplicate cospi_12_64 - vdup.16 d1, r6; ; duplicate cospi_20_64 + vdup.16 d0, r5 ; duplicate cospi_12_64 + vdup.16 d1, r6 ; duplicate cospi_20_64 ; input[5] * cospi_12_64 vmull.s16 q2, d26, d0 @@ -93,7 +93,7 @@ vqrshrn.s32 d13, q1, #14 ; >> 14 ; stage 2 & stage 3 - even half - vdup.16 d0, r7; ; duplicate cospi_16_64 + vdup.16 d0, r7 ; duplicate cospi_16_64 ; input[0] * cospi_16_64 vmull.s16 q2, d16, d0 @@ -128,8 +128,8 @@ vqrshrn.s32 d23, q3, #14 ; >> 14 ; input[1] * cospi_24_64 - input[3] * cospi_8_64 - vdup.16 d0, r8; ; duplicate cospi_24_64 - vdup.16 d1, r9; ; duplicate cospi_8_64 + vdup.16 d0, r8 ; duplicate cospi_24_64 + vdup.16 d1, r9 ; duplicate cospi_8_64 ; input[1] * cospi_24_64 vmull.s16 q2, d20, d0 @@ -176,7 +176,7 @@ vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7] ; stage 3 -odd half - vdup.16 d16, r7; ; duplicate cospi_16_64 + vdup.16 d16, r7 ; duplicate cospi_16_64 ; step2[6] * cospi_16_64 vmull.s16 q9, d28, d16 @@ -211,14 +211,14 @@ vqrshrn.s32 d13, q10, #14 ; >> 14 ; stage 4 - vadd.s16 q8, q0, q7; ; output[0] = step1[0] + step1[7]; - vadd.s16 q9, q1, q6; ; output[1] = step1[1] + step1[6]; - vadd.s16 q10, q2, q5; ; output[2] = step1[2] + step1[5]; - vadd.s16 q11, q3, q4; ; output[3] = step1[3] + step1[4]; - vsub.s16 q12, q3, q4; ; output[4] = step1[3] - step1[4]; - vsub.s16 q13, q2, q5; ; output[5] = step1[2] - step1[5]; - vsub.s16 q14, q1, q6; ; output[6] = step1[1] - step1[6]; - vsub.s16 q15, q0, q7; ; output[7] = step1[0] - step1[7]; + vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7]; + vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6]; + vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5]; + vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4]; + vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4]; + vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5]; + vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6]; + vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7]; MEND ; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15. @@ -310,14 +310,14 @@ mov r0, r1 ; load destination data - vld1.u8 {d0}, [r1], r2 - vld1.u8 {d1}, [r1], r2 - vld1.s16 {d2}, [r1], r2 - vld1.s16 {d3}, [r1], r2 - vld1.s16 {d4}, [r1], r2 - vld1.s16 {d5}, [r1], r2 - vld1.s16 {d6}, [r1], r2 - vld1.s16 {d7}, [r1] + vld1.64 {d0}, [r1], r2 + vld1.64 {d1}, [r1], r2 + vld1.64 {d2}, [r1], r2 + vld1.64 {d3}, [r1], r2 + vld1.64 {d4}, [r1], r2 + vld1.64 {d5}, [r1], r2 + vld1.64 {d6}, [r1], r2 + vld1.64 {d7}, [r1] ; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i] vaddw.u8 q8, q8, d0 diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index 129711412..f68c5c6ea 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -26,9 +26,6 @@ #include "vp9/common/vp9_treecoder.h" #define BLOCK_SIZE_GROUPS 4 - -#define PREDICTION_PROBS 3 - #define MBSKIP_CONTEXTS 3 /* Segment Feature Masks */ @@ -164,6 +161,11 @@ typedef struct { union b_mode_info bmi[4]; } MODE_INFO; +static int is_inter_block(const MB_MODE_INFO *mbmi) { + return mbmi->ref_frame[0] > INTRA_FRAME; +} + + enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 @@ -286,22 +288,22 @@ typedef struct macroblockd { static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { switch (subsize) { - case BLOCK_SIZE_SB64X64: - case BLOCK_SIZE_SB64X32: - case BLOCK_SIZE_SB32X64: - case BLOCK_SIZE_SB32X32: + case BLOCK_64X64: + case BLOCK_64X32: + case BLOCK_32X64: + case BLOCK_32X32: return &xd->sb_index; - case BLOCK_SIZE_SB32X16: - case BLOCK_SIZE_SB16X32: - case BLOCK_SIZE_MB16X16: + case BLOCK_32X16: + case BLOCK_16X32: + case BLOCK_16X16: return &xd->mb_index; - case BLOCK_SIZE_SB16X8: - case BLOCK_SIZE_SB8X16: - case BLOCK_SIZE_SB8X8: + case BLOCK_16X8: + case BLOCK_8X16: + case BLOCK_8X8: return &xd->b_index; - case BLOCK_SIZE_SB8X4: - case BLOCK_SIZE_SB4X8: - case BLOCK_SIZE_AB4X4: + case BLOCK_8X4: + case BLOCK_4X8: + case BLOCK_4X4: return &xd->ab_index; default: assert(0); @@ -315,7 +317,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2; const int bwl = b_width_log2(sb_type); const int bhl = b_height_log2(sb_type); - const int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl; + const int boffset = b_width_log2(BLOCK_64X64) - bsl; const char pcval0 = ~(0xe << boffset); const char pcval1 = ~(0xf << boffset); const char pcvalue[2] = {pcval0, pcval1}; @@ -333,7 +335,7 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type) { int bsl = mi_width_log2(sb_type), bs = 1 << bsl; int above = 0, left = 0, i; - int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; + int boffset = mi_width_log2(BLOCK_64X64) - bsl; assert(mi_width_log2(sb_type) == mi_height_log2(sb_type)); assert(bsl >= 0); @@ -366,10 +368,10 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, if (plane_type != PLANE_TYPE_Y_WITH_DC || xd->lossless || - mbmi->ref_frame[0] != INTRA_FRAME) + is_inter_block(mbmi)) return DCT_DCT; - return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ? + return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ? mi->bmi[ib].as_mode : mbmi->mode]; } @@ -496,16 +498,16 @@ static INLINE void foreach_transformed_block_in_plane( // it to 4x4 block sizes. if (xd->mb_to_right_edge < 0) max_blocks_wide += - + (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x)); + (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x)); if (xd->mb_to_bottom_edge < 0) max_blocks_high += - + (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y)); + (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y)); i = 0; // Unlike the normal case - in here we have to keep track of the // row and column of the blocks we use so that we know if we are in - // the unrestricted motion border.. + // the unrestricted motion border. for (r = 0; r < (1 << sh); r += (1 << tx_size)) { for (c = 0; c < (1 << sw); c += (1 << tx_size)) { if (r < max_blocks_high && c < max_blocks_wide) @@ -563,8 +565,8 @@ static INLINE void foreach_predicted_block_in_plane( // size of the predictor to use. int pred_w, pred_h; - if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - assert(bsize == BLOCK_SIZE_SB8X8); + if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) { + assert(bsize == BLOCK_8X8); pred_w = 0; pred_h = 0; } else { @@ -689,46 +691,39 @@ static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block, } } static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - int plane, int ss_tx_size, int eob, int aoff, - int loff, ENTROPY_CONTEXT *A, - ENTROPY_CONTEXT *L) { - const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); - const int sw = bw - xd->plane[plane].subsampling_x; - const int sh = bh - xd->plane[plane].subsampling_y; - int mi_blocks_wide = 1 << sw; - int mi_blocks_high = 1 << sh; - int tx_size_in_blocks = (1 << ss_tx_size); + int plane, int tx_size_in_blocks, + int eob, int aoff, int loff, + ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { + struct macroblockd_plane *pd = &xd->plane[plane]; int above_contexts = tx_size_in_blocks; int left_contexts = tx_size_in_blocks; + int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd); + int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd); int pt; // xd->mb_to_right_edge is in units of pixels * 8. This converts // it to 4x4 block sizes. - if (xd->mb_to_right_edge < 0) { - mi_blocks_wide += (xd->mb_to_right_edge - >> (5 + xd->plane[plane].subsampling_x)); - } + if (xd->mb_to_right_edge < 0) + mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); // this code attempts to avoid copying into contexts that are outside // our border. Any blocks that do are set to 0... if (above_contexts + aoff > mi_blocks_wide) above_contexts = mi_blocks_wide - aoff; - if (xd->mb_to_bottom_edge < 0) { - mi_blocks_high += (xd->mb_to_bottom_edge - >> (5 + xd->plane[plane].subsampling_y)); - } - if (left_contexts + loff > mi_blocks_high) { + if (xd->mb_to_bottom_edge < 0) + mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + if (left_contexts + loff > mi_blocks_high) left_contexts = mi_blocks_high - loff; - } for (pt = 0; pt < above_contexts; pt++) A[pt] = eob > 0; - for (pt = above_contexts; pt < (1 << ss_tx_size); pt++) + for (pt = above_contexts; pt < tx_size_in_blocks; pt++) A[pt] = 0; for (pt = 0; pt < left_contexts; pt++) L[pt] = eob > 0; - for (pt = left_contexts; pt < (1 << ss_tx_size); pt++) + for (pt = left_contexts; pt < tx_size_in_blocks; pt++) L[pt] = 0; } diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c index dee44ec63..fdf37e46a 100644 --- a/libvpx/vp9/common/vp9_common_data.c +++ b/libvpx/vp9/common/vp9_common_data.c @@ -31,6 +31,14 @@ const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] = const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] = {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; +// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize))) +const int size_group_lookup[BLOCK_SIZE_TYPES] = + {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; + +const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] = + {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; + + const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = { { // 4X4 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 @@ -40,25 +48,25 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = { PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 8X8 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 16X16 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 32X32 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 64X64 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, @@ -68,29 +76,29 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = { const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = { { // PARTITION_NONE - BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB8X4, - BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB16X8, - BLOCK_SIZE_MB16X16, BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB32X16, - BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64, BLOCK_SIZE_SB64X32, - BLOCK_SIZE_SB64X64, + BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, + BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, + BLOCK_64X64, }, { // PARTITION_HORZ BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB64X32, + BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_64X32, }, { // PARTITION_VERT BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X64, + BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X64, }, { // PARTITION_SPLIT BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_AB4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_MB16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X32, + BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X32, } }; @@ -108,14 +116,9 @@ const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = { }; const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = { - {BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8, - BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8}, - {BLOCK_SIZE_SB8X4, BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, - BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB8X16}, - {BLOCK_SIZE_SB16X8, BLOCK_SIZE_SB16X8, BLOCK_SIZE_MB16X16, - BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB16X32}, - {BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, - BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64}, - {BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, - BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X64} + { BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 }, + { BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 }, + { BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 }, + { BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 }, + { BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 } }; diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h index 8b0f8a500..bc8c01a77 100644 --- a/libvpx/vp9/common/vp9_common_data.h +++ b/libvpx/vp9/common/vp9_common_data.h @@ -21,10 +21,9 @@ extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES]; extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES]; extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES]; extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES]; -extern const PARTITION_TYPE - partition_lookup[][BLOCK_SIZE_TYPES]; - - +extern const int size_group_lookup[BLOCK_SIZE_TYPES]; +extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES]; +extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES]; extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES]; extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES]; extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES]; diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c index 0ad0dbccd..df3a9fed5 100644 --- a/libvpx/vp9/common/vp9_entropy.c +++ b/libvpx/vp9/common/vp9_entropy.c @@ -73,7 +73,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]) = { 13, 11, 14, 15, }; -DECLARE_ALIGNED(64, const int16_t, vp9_default_scan_8x8[64]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]) = { 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26, 33, 19, 40, 12, 34, 27, 5, 41, @@ -419,7 +419,7 @@ static void init_bit_trees() { init_bit_tree(cat6, 14); } -vp9_extra_bit vp9_extra_bits[12] = { +const vp9_extra_bit vp9_extra_bits[12] = { { 0, 0, 0, 0}, { 0, 0, 0, 1}, { 0, 0, 0, 2}, @@ -437,14 +437,10 @@ vp9_extra_bit vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" void vp9_default_coef_probs(VP9_COMMON *pc) { - vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4, - sizeof(pc->fc.coef_probs[TX_4X4])); - vpx_memcpy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8, - sizeof(pc->fc.coef_probs[TX_8X8])); - vpx_memcpy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16, - sizeof(pc->fc.coef_probs[TX_16X16])); - vpx_memcpy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32, - sizeof(pc->fc.coef_probs[TX_32X32])); + vp9_copy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4); + vp9_copy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8); + vp9_copy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16); + vp9_copy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32); } // Neighborhood 5-tuples for various scans and blocksizes, @@ -613,17 +609,17 @@ void vp9_coef_tree_initialize() { #define COEF_COUNT_SAT_AFTER_KEY 24 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size, - int count_sat, int update_factor) { +static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, + unsigned int count_sat, + unsigned int update_factor) { FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; - vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[txfm_size]; - vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[txfm_size]; - vp9_coeff_count_model *coef_counts = cm->counts.coef[txfm_size]; + vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[tx_size]; + vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[tx_size]; + vp9_coeff_count_model *coef_counts = cm->counts.coef[tx_size]; unsigned int (*eob_branch_count)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] = - cm->counts.eob_branch[txfm_size]; - int t, i, j, k, l, count; - int factor; + cm->counts.eob_branch[tx_size]; + int t, i, j, k, l; unsigned int branch_ct[UNCONSTRAINED_NODES][2]; vp9_prob coef_probs[UNCONSTRAINED_NODES]; int entropy_nodes_adapt = UNCONSTRAINED_NODES; @@ -634,29 +630,23 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size, for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { if (l >= 3 && k == 0) continue; - vp9_tree_probs_from_distribution( - vp9_coefmodel_tree, - coef_probs, branch_ct, - coef_counts[i][j][k][l], 0); + vp9_tree_probs_from_distribution(vp9_coefmodel_tree, coef_probs, + branch_ct, coef_counts[i][j][k][l], + 0); branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0]; coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); - for (t = 0; t < entropy_nodes_adapt; ++t) { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - dst_coef_probs[i][j][k][l][t] = - weighted_prob(pre_coef_probs[i][j][k][l][t], - coef_probs[t], factor); - } + for (t = 0; t < entropy_nodes_adapt; ++t) + dst_coef_probs[i][j][k][l][t] = merge_probs( + pre_coef_probs[i][j][k][l][t], coef_probs[t], + branch_ct[t], count_sat, update_factor); } } void vp9_adapt_coef_probs(VP9_COMMON *cm) { TX_SIZE t; - int count_sat; - int update_factor; /* denominator 256 */ + unsigned int count_sat, update_factor; - if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { + if (cm->frame_type == KEY_FRAME || cm->intra_only) { update_factor = COEF_MAX_UPDATE_FACTOR_KEY; count_sat = COEF_COUNT_SAT_KEY; } else if (cm->last_frame_type == KEY_FRAME) { diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h index 4ea727ff4..861c0786c 100644 --- a/libvpx/vp9/common/vp9_entropy.h +++ b/libvpx/vp9/common/vp9_entropy.h @@ -50,7 +50,7 @@ typedef struct { int base_val; } vp9_extra_bit; -extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ +extern const vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ #define MAX_PROB 255 #define DCT_MAX_VALUE 16384 @@ -80,7 +80,6 @@ extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ coefficient band (and since zigzag positions 0, 1, and 2 are in distinct bands). */ -/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ #define PREV_COEF_CONTEXTS 6 // #define ENTROPY_STATS @@ -102,7 +101,7 @@ extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]); extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]); extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]); -extern DECLARE_ALIGNED(64, const int16_t, vp9_default_scan_8x8[64]); +extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]); extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_8x8[64]); extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]); @@ -119,7 +118,7 @@ extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]); extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]); extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_4x4[16]); -extern DECLARE_ALIGNED(64, int16_t, vp9_default_iscan_8x8[64]); +extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_8x8[64]); extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_8x8[64]); extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_8x8[64]); diff --git a/libvpx/vp9/common/vp9_entropymode.c b/libvpx/vp9/common/vp9_entropymode.c index ca188e438..768e5f523 100644 --- a/libvpx/vp9/common/vp9_entropymode.c +++ b/libvpx/vp9/common/vp9_entropymode.c @@ -356,53 +356,15 @@ void vp9_entropy_mode_init() { vp9_inter_mode_tree, NEARESTMV); } -void vp9_accum_mv_refs(VP9_COMMON *pc, - MB_PREDICTION_MODE m, - const int context) { - unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = - pc->counts.inter_mode; - - if (m == ZEROMV) { - ++inter_mode_counts[context][0][0]; - } else { - ++inter_mode_counts[context][0][1]; - if (m == NEARESTMV) { - ++inter_mode_counts[context][1][0]; - } else { - ++inter_mode_counts[context][1][1]; - if (m == NEARMV) { - ++inter_mode_counts[context][2][0]; - } else { - ++inter_mode_counts[context][2][1]; - } - } - } -} - #define COUNT_SAT 20 #define MAX_UPDATE_FACTOR 128 -static int update_ct(vp9_prob pre_prob, vp9_prob prob, - unsigned int ct[2]) { - const int count = MIN(ct[0] + ct[1], COUNT_SAT); - const int factor = MAX_UPDATE_FACTOR * count / COUNT_SAT; - return weighted_prob(pre_prob, prob, factor); +static int update_ct(vp9_prob pre_prob, vp9_prob prob, unsigned int ct[2]) { + return merge_probs(pre_prob, prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR); } static int update_ct2(vp9_prob pre_prob, unsigned int ct[2]) { - return update_ct(pre_prob, get_binary_prob(ct[0], ct[1]), ct); -} - -void vp9_adapt_mode_context(VP9_COMMON *pc) { - int i, j; - FRAME_CONTEXT *const fc = &pc->fc; - FRAME_CONTEXT *const pre_fc = &pc->frame_contexts[pc->frame_context_idx]; - FRAME_COUNTS *const counts = &pc->counts; - - for (j = 0; j < INTER_MODE_CONTEXTS; j++) - for (i = 0; i < VP9_INTER_MODES - 1; i++) - fc->inter_mode_probs[j][i] = update_ct2(pre_fc->inter_mode_probs[j][i], - counts->inter_mode[j][i]); + return merge_probs2(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR); } static void update_mode_probs(int n_modes, @@ -440,6 +402,11 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { fc->single_ref_prob[i][j] = update_ct2(pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree, + counts->inter_mode[i], pre_fc->inter_mode_probs[i], + fc->inter_mode_probs[i], NEARESTMV); + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, counts->y_mode[i], pre_fc->y_mode_prob[i], @@ -466,25 +433,25 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { if (cm->tx_mode == TX_MODE_SELECT) { int j; - unsigned int branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2]; - unsigned int branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2]; - unsigned int branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2]; + unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; + unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; + unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) + for (j = 0; j < TX_SIZES - 3; ++j) fc->tx_probs.p8x8[i][j] = update_ct2(pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) + for (j = 0; j < TX_SIZES - 2; ++j) fc->tx_probs.p16x16[i][j] = update_ct2(pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) + for (j = 0; j < TX_SIZES - 1; ++j) fc->tx_probs.p32x32[i][j] = update_ct2(pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); } @@ -495,22 +462,24 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { counts->mbskip[i]); } -static void set_default_lf_deltas(MACROBLOCKD *xd) { - xd->lf.mode_ref_delta_enabled = 1; - xd->lf.mode_ref_delta_update = 1; +static void set_default_lf_deltas(struct loopfilter *lf) { + lf->mode_ref_delta_enabled = 1; + lf->mode_ref_delta_update = 1; - xd->lf.ref_deltas[INTRA_FRAME] = 1; - xd->lf.ref_deltas[LAST_FRAME] = 0; - xd->lf.ref_deltas[GOLDEN_FRAME] = -1; - xd->lf.ref_deltas[ALTREF_FRAME] = -1; + lf->ref_deltas[INTRA_FRAME] = 1; + lf->ref_deltas[LAST_FRAME] = 0; + lf->ref_deltas[GOLDEN_FRAME] = -1; + lf->ref_deltas[ALTREF_FRAME] = -1; - xd->lf.mode_deltas[0] = 0; - xd->lf.mode_deltas[1] = 0; + lf->mode_deltas[0] = 0; + lf->mode_deltas[1] = 0; } void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) { // Reset the segment feature data to the default stats: // Features disabled, 0, with delta coding (Default state). + struct loopfilter *const lf = &xd->lf; + int i; vp9_clearall_segfeatures(&xd->seg); xd->seg.abs_delta = SEGMENT_DELTADATA; @@ -518,12 +487,12 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) { vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); // Reset the mode ref deltas for loop filter - vp9_zero(xd->lf.last_ref_deltas); - vp9_zero(xd->lf.last_mode_deltas); - set_default_lf_deltas(xd); + vp9_zero(lf->last_ref_deltas); + vp9_zero(lf->last_mode_deltas); + set_default_lf_deltas(lf); // To force update of the sharpness - xd->lf.last_sharpness_level = -1; + lf->last_sharpness_level = -1; vp9_default_coef_probs(cm); vp9_init_mbmode_probs(cm); diff --git a/libvpx/vp9/common/vp9_entropymode.h b/libvpx/vp9/common/vp9_entropymode.h index 8c14e7e17..17a7c2634 100644 --- a/libvpx/vp9/common/vp9_entropymode.h +++ b/libvpx/vp9/common/vp9_entropymode.h @@ -24,15 +24,15 @@ struct VP9Common; struct tx_probs { - vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; - vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; - vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 3]; + vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; + vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; }; struct tx_counts { - unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB]; - unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; - unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; + unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; + unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; }; extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; @@ -61,18 +61,12 @@ extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; void vp9_entropy_mode_init(); -int vp9_mv_cont(const int_mv *l, const int_mv *a); - void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd); void vp9_init_mbmode_probs(struct VP9Common *x); -void vp9_adapt_mode_context(struct VP9Common *pc); - void vp9_adapt_mode_probs(struct VP9Common *); -void vp9_accum_mv_refs(struct VP9Common *pc, MB_PREDICTION_MODE m, int context); - void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p, unsigned int (*ct_32x32p)[2]); void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p, diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c index 343b6241d..6cfc34697 100644 --- a/libvpx/vp9/common/vp9_entropymv.c +++ b/libvpx/vp9/common/vp9_entropymv.c @@ -16,7 +16,7 @@ #define MV_MAX_UPDATE_FACTOR 128 /* Integer pel reference mv threshold for use of high-precision 1/8 mv */ -#define COMPANDED_MVREF_THRESH 8 +#define COMPANDED_MVREF_THRESH 8 const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { -MV_JOINT_ZERO, 2, @@ -107,12 +107,6 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) { return mv_class_base(c) + offset; } -static void inc_mv_component_count(int v, nmv_component_counts *comp_counts, - int incr) { - assert (v != 0); - comp_counts->mvcount[MV_MAX + v] += incr; -} - static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr, int usehp) { int s, z, c, o, d, e, f; @@ -164,25 +158,19 @@ static void counts_to_context(nmv_component_counts *mvcomp, int usehp) { } } -void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx) { +void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); - mvctx->joints[j]++; + ++counts->joints[j]; + if (mv_joint_vertical(j)) - inc_mv_component_count(mv->row, &mvctx->comps[0], 1); + ++counts->comps[0].mvcount[MV_MAX + mv->row]; if (mv_joint_horizontal(j)) - inc_mv_component_count(mv->col, &mvctx->comps[1], 1); + ++counts->comps[1].mvcount[MV_MAX + mv->col]; } -static void adapt_prob(vp9_prob *dest, vp9_prob prep, unsigned int ct[2]) { - const int count = MIN(ct[0] + ct[1], MV_COUNT_SAT); - if (count) { - const vp9_prob newp = get_binary_prob(ct[0], ct[1]); - const int factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT; - *dest = weighted_prob(prep, newp, factor); - } else { - *dest = prep; - } +static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) { + return merge_probs2(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR); } void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) { @@ -195,31 +183,22 @@ static unsigned int adapt_probs(unsigned int i, vp9_prob this_probs[], const vp9_prob last_probs[], const unsigned int num_events[]) { - vp9_prob this_prob; - const uint32_t left = tree[i] <= 0 + + const unsigned int left = tree[i] <= 0 ? num_events[-tree[i]] : adapt_probs(tree[i], tree, this_probs, last_probs, num_events); - const uint32_t right = tree[i + 1] <= 0 + const unsigned int right = tree[i + 1] <= 0 ? num_events[-tree[i + 1]] : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events); - - uint32_t weight = left + right; - if (weight) { - this_prob = get_binary_prob(left, right); - weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight; - this_prob = weighted_prob(last_probs[i >> 1], this_prob, - MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT); - } else { - this_prob = last_probs[i >> 1]; - } - this_probs[i >> 1] = this_prob; + const unsigned int ct[2] = { left, right }; + this_probs[i >> 1] = adapt_prob(last_probs[i >> 1], ct); return left + right; } -void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) { +void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { int i, j; FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; @@ -228,36 +207,32 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) { nmv_context *pre_ctx = &pre_fc->nmvc; nmv_context_counts *cts = &cm->counts.mv; - vp9_counts_process(cts, usehp); + vp9_counts_process(cts, allow_hp); adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints); for (i = 0; i < 2; ++i) { - adapt_prob(&ctx->comps[i].sign, pre_ctx->comps[i].sign, cts->comps[i].sign); + ctx->comps[i].sign = adapt_prob(pre_ctx->comps[i].sign, cts->comps[i].sign); adapt_probs(0, vp9_mv_class_tree, ctx->comps[i].classes, pre_ctx->comps[i].classes, cts->comps[i].classes); adapt_probs(0, vp9_mv_class0_tree, ctx->comps[i].class0, pre_ctx->comps[i].class0, cts->comps[i].class0); for (j = 0; j < MV_OFFSET_BITS; ++j) - adapt_prob(&ctx->comps[i].bits[j], pre_ctx->comps[i].bits[j], - cts->comps[i].bits[j]); - } + ctx->comps[i].bits[j] = adapt_prob(pre_ctx->comps[i].bits[j], + cts->comps[i].bits[j]); - for (i = 0; i < 2; ++i) { for (j = 0; j < CLASS0_SIZE; ++j) adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].class0_fp[j], pre_ctx->comps[i].class0_fp[j], cts->comps[i].class0_fp[j]); adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp, cts->comps[i].fp); - } - if (usehp) { - for (i = 0; i < 2; ++i) { - adapt_prob(&ctx->comps[i].class0_hp, pre_ctx->comps[i].class0_hp, - cts->comps[i].class0_hp); - adapt_prob(&ctx->comps[i].hp, pre_ctx->comps[i].hp, cts->comps[i].hp); + if (allow_hp) { + ctx->comps[i].class0_hp = adapt_prob(pre_ctx->comps[i].class0_hp, + cts->comps[i].class0_hp); + ctx->comps[i].hp = adapt_prob(pre_ctx->comps[i].hp, cts->comps[i].hp); } } } diff --git a/libvpx/vp9/common/vp9_enums.h b/libvpx/vp9/common/vp9_enums.h index 86f0d0bfd..3208b7270 100644 --- a/libvpx/vp9/common/vp9_enums.h +++ b/libvpx/vp9/common/vp9_enums.h @@ -54,7 +54,7 @@ typedef enum { TX_8X8 = 1, // 8x8 dct transform TX_16X16 = 2, // 16x16 dct transform TX_32X32 = 3, // 32x32 dct transform - TX_SIZE_MAX_SB, // Number of transforms available to SBs + TX_SIZES } TX_SIZE; typedef enum { @@ -63,7 +63,7 @@ typedef enum { ALLOW_16X16 = 2, ALLOW_32X32 = 3, TX_MODE_SELECT = 4, - NB_TXFM_MODES = 5, + TX_MODES = 5, } TX_MODE; typedef enum { diff --git a/libvpx/vp9/common/vp9_extend.c b/libvpx/vp9/common/vp9_extend.c index 95ec59061..d8496c4f2 100644 --- a/libvpx/vp9/common/vp9_extend.c +++ b/libvpx/vp9/common/vp9_extend.c @@ -8,9 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/vp9_extend.h" #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_extend.h" + static void copy_and_extend_plane(const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch, int w, int h, @@ -107,14 +109,14 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, const int src_y_offset = srcy * src->y_stride + srcx; const int dst_y_offset = srcy * dst->y_stride + srcx; - const int et_uv = (et_y + 1) >> 1; - const int el_uv = (el_y + 1) >> 1; - const int eb_uv = (eb_y + 1) >> 1; - const int er_uv = (er_y + 1) >> 1; + const int et_uv = ROUND_POWER_OF_TWO(et_y, 1); + const int el_uv = ROUND_POWER_OF_TWO(el_y, 1); + const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1); + const int er_uv = ROUND_POWER_OF_TWO(er_y, 1); const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - const int srch_uv = (srch + 1) >> 1; - const int srcw_uv = (srcw + 1) >> 1; + const int srch_uv = ROUND_POWER_OF_TWO(srch, 1); + const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1); copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, dst->y_buffer + dst_y_offset, dst->y_stride, diff --git a/libvpx/vp9/common/vp9_findnearmv.c b/libvpx/vp9/common/vp9_findnearmv.c index 643b229a6..3af8b8d21 100644 --- a/libvpx/vp9/common/vp9_findnearmv.c +++ b/libvpx/vp9/common/vp9_findnearmv.c @@ -14,8 +14,9 @@ #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_sadmxn.h" -static void lower_mv_precision(int_mv *mv, int usehp) { - if (!usehp || !vp9_use_mv_hp(&mv->as_mv)) { +static void lower_mv_precision(int_mv *mv, int allow_hp) { + const int use_hp = allow_hp && vp9_use_mv_hp(&mv->as_mv); + if (!use_hp) { if (mv->as_mv.row & 1) mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); if (mv->as_mv.col & 1) @@ -32,7 +33,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, // Make sure all the candidates are properly clamped etc for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv); - clamp_mv2(&mvlist[i], xd); + clamp_mv2(&mvlist[i].as_mv, xd); } *nearest = mvlist[0]; *near = mvlist[1]; @@ -41,7 +42,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int_mv *dst_nearest, int_mv *dst_near, - int block_idx, int ref_idx) { + int block_idx, int ref_idx, + int mi_row, int mi_col) { int_mv dst_list[MAX_MV_REF_CANDIDATES]; int_mv mv_list[MAX_MV_REF_CANDIDATES]; MODE_INFO *mi = xd->mode_info_context; @@ -53,7 +55,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context, xd->prev_mode_info_context, mbmi->ref_frame[ref_idx], - mv_list, cm->ref_frame_sign_bias, block_idx); + mv_list, cm->ref_frame_sign_bias, block_idx, + mi_row, mi_col); dst_list[1].as_int = 0; if (block_idx == 0) { diff --git a/libvpx/vp9/common/vp9_findnearmv.h b/libvpx/vp9/common/vp9_findnearmv.h index b0fa505b5..e5221ed67 100644 --- a/libvpx/vp9/common/vp9_findnearmv.h +++ b/libvpx/vp9/common/vp9_findnearmv.h @@ -29,31 +29,19 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int_mv *near); // TODO(jingning): this mv clamping function should be block size dependent. -static void clamp_mv(int_mv *mv, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { - mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge); - mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge); -} - -static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { - int_mv tmp_mv; - tmp_mv.as_int = mv->as_int; - clamp_mv(mv, - xd->mb_to_left_edge - LEFT_TOP_MARGIN, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); - return tmp_mv.as_int != mv->as_int; +static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc, MACROBLOCKD *xd, int_mv *dst_nearest, int_mv *dst_near, - int block_idx, int ref_idx); + int block_idx, int ref_idx, + int mi_row, int mi_col); static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { // FIXME(rbultje, jingning): temporary hack because jenkins doesn't @@ -62,7 +50,7 @@ static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { /* On L edge, get from MB to left of us */ --cur_mb; - if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(&cur_mb->mbmi)) { return DC_PRED; } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { return ((cur_mb->bmi + 1 + b)->as_mode); @@ -80,7 +68,7 @@ static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, /* On top edge, get from MB above us */ cur_mb -= mi_stride; - if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(&cur_mb->mbmi)) { return DC_PRED; } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { return ((cur_mb->bmi + 2 + b)->as_mode); diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c index a95560a55..a2245259e 100644 --- a/libvpx/vp9/common/vp9_idct.c +++ b/libvpx/vp9/common/vp9_idct.c @@ -225,6 +225,19 @@ void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) { } } +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { + int i, j; + int a1; + int16_t out = dct_const_round_shift(input[0] * cospi_16_64); + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 5); + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) + dest[i] = clip_pixel(dest[i] + a1); + dest += dest_stride; + } +} + static void iadst4_1d(int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; @@ -433,12 +446,6 @@ void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, } } -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) { - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - output[0] = ROUND_POWER_OF_TWO(out, 5); -} - static void idct16_1d(int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -857,10 +864,18 @@ void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, } } -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) { +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, + int dest_stride) { + int i, j; + int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64); - output[0] = ROUND_POWER_OF_TWO(out, 6); + a1 = ROUND_POWER_OF_TWO(out, 6); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) + dest[i] = clip_pixel(dest[i] + a1); + dest += dest_stride; + } } static void idct32_1d(int16_t *input, int16_t *output) { @@ -1259,29 +1274,3 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { out = dct_const_round_shift(out * cospi_16_64); output[0] = ROUND_POWER_OF_TWO(out, 6); } - -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { - int16_t out[32 * 32] = { 0 }; - int16_t *outptr = out; - int i, j; - int16_t temp_in[32], temp_out[32]; - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - idct32_1d(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_1d(temp_in, temp_out); - for (j = 0; j < 32; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); - } -} diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index 5498b1717..66df62753 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -16,6 +16,12 @@ #include "vp9/common/vp9_seg_common.h" +struct loop_filter_info { + const uint8_t *mblim; + const uint8_t *lim; + const uint8_t *hev_thr; +}; + static void lf_init_lut(loop_filter_info_n *lfi) { lfi->mode_lf_lut[DC_PRED] = 0; lfi->mode_lf_lut[D45_PRED] = 0; @@ -73,13 +79,14 @@ void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) { void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd, int default_filt_lvl) { - int seg; + int seg_id; // n_shift is the a multiplier for lf_deltas // the multiplier is 1 for when filter_lvl is between 0 and 31; // 2 when filter_lvl is between 32 and 63 const int n_shift = default_filt_lvl >> 5; loop_filter_info_n *const lfi = &cm->lf_info; - struct loopfilter *lf = &xd->lf; + struct loopfilter *const lf = &xd->lf; + struct segmentation *const seg = &xd->seg; // update limits if sharpness has changed if (lf->last_sharpness_level != lf->sharpness_level) { @@ -87,13 +94,13 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd, lf->last_sharpness_level = lf->sharpness_level; } - for (seg = 0; seg < MAX_SEGMENTS; seg++) { + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { int lvl_seg = default_filt_lvl, ref, mode, intra_lvl; // Set the baseline filter values for each segment - if (vp9_segfeature_active(&xd->seg, seg, SEG_LVL_ALT_LF)) { - const int data = vp9_get_segdata(&xd->seg, seg, SEG_LVL_ALT_LF); - lvl_seg = xd->seg.abs_delta == SEGMENT_ABSDATA + if (vp9_segfeature_active(&xd->seg, seg_id, SEG_LVL_ALT_LF)) { + const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); + lvl_seg = seg->abs_delta == SEGMENT_ABSDATA ? data : clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER); } @@ -101,18 +108,18 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (!lf->mode_ref_delta_enabled) { // we could get rid of this if we assume that deltas are set to // zero when not in use; encoder always uses deltas - vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4); + vpx_memset(lfi->lvl[seg_id][0], lvl_seg, 4 * 4); continue; } intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift); - lfi->lvl[seg][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); + lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift) + (lf->mode_deltas[mode] << n_shift); - lfi->lvl[seg][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); } } } @@ -256,7 +263,7 @@ static void filter_block_plane(VP9_COMMON *const cm, // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const int skip_this = mi[c].mbmi.mb_skip_coeff - && mi[c].mbmi.ref_frame[0] != INTRA_FRAME; + && is_inter_block(&mi[c].mbmi); // left edge of current unit is block/partition edge -> no skip const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ? !(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1; @@ -376,3 +383,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, vp9_loop_filter_rows(cm->frame_to_show, cm, xd, 0, cm->mi_rows, y_only); } + +int vp9_loop_filter_worker(void *arg1, void *arg2) { + LFWorkerData *const lf_data = (LFWorkerData*)arg1; + (void)arg2; + vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd, + lf_data->start, lf_data->stop, lf_data->y_only); + return 1; +} diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h index e59cc6485..5fc909495 100644 --- a/libvpx/vp9/common/vp9_loopfilter.h +++ b/libvpx/vp9/common/vp9_loopfilter.h @@ -35,13 +35,6 @@ typedef struct { uint8_t mode_lf_lut[MB_MODE_COUNT]; } loop_filter_info_n; -struct loop_filter_info { - const uint8_t *mblim; - const uint8_t *lim; - const uint8_t *hev_thr; -}; - - /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; struct macroblockd; @@ -64,4 +57,18 @@ void vp9_loop_filter_frame(struct VP9Common *cm, void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, struct VP9Common *cm, struct macroblockd *xd, int start, int stop, int y_only); + +typedef struct LoopFilterWorkerData { + const YV12_BUFFER_CONFIG *frame_buffer; + struct VP9Common *cm; + struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the + // loopfilter. the planes are necessary as their state + // is changed during decode. + int start; + int stop; + int y_only; +} LFWorkerData; + +// Operates on the rows described by LFWorkerData passed as 'arg1'. +int vp9_loop_filter_worker(void *arg1, void *arg2); #endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h index a095258be..31a79b984 100644 --- a/libvpx/vp9/common/vp9_mv.h +++ b/libvpx/vp9/common/vp9_mv.h @@ -13,6 +13,8 @@ #include "vpx/vpx_integer.h" +#include "vp9/common/vp9_common.h" + typedef struct { int16_t row; int16_t col; @@ -28,4 +30,10 @@ typedef struct { int32_t col; } MV32; +static void clamp_mv(MV *mv, int min_col, int max_col, + int min_row, int max_row) { + mv->col = clamp(mv->col, min_col, max_col); + mv->row = clamp(mv->row, min_row, max_row); +} + #endif // VP9_COMMON_VP9_MV_H_ diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c index ae009b0ff..3b72f41c2 100644 --- a/libvpx/vp9/common/vp9_mvref_common.c +++ b/libvpx/vp9/common/vp9_mvref_common.c @@ -11,6 +11,65 @@ #include "vp9/common/vp9_mvref_common.h" #define MVREF_NEIGHBOURS 8 + +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +// This is used to figure out a context for the ref blocks. The code flattens +// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by +// adding 9 for each intra block, 3 for each zero mv and 1 for each new +// motion vector. This single number is then converted into a context +// with a single lookup ( counter_to_context ). +static const int mode_2_counter[MB_MODE_COUNT] = { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D27_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV +}; + +// There are 3^3 different combinations of 3 counts that can be either 0,1 or +// 2. However the actual count can never be greater than 2 so the highest +// counter we need is 18. 9 is an invalid counter that's never used. +static const int counter_to_context[19] = { + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 + INTRA_PLUS_NON_INTRA, // 9 + INTRA_PLUS_NON_INTRA, // 10 + INVALID_CASE, // 11 + INTRA_PLUS_NON_INTRA, // 12 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 +}; + static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = { // SB4X4 {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, @@ -39,263 +98,212 @@ static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = { // SB64X64 {{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}} }; + +static const int idx_n_column_to_subblock[4][2] = { + {1, 2}, + {1, 3}, + {3, 2}, + {3, 3} +}; + // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) { - mv->as_mv.col = clamp(mv->as_mv.col, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER); - mv->as_mv.row = clamp(mv->as_mv.row, xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); -} - -// Gets a candidate reference motion vector from the given mode info -// structure if one exists that matches the given reference frame. -static int get_matching_candidate(const MODE_INFO *candidate_mi, - MV_REFERENCE_FRAME ref_frame, - int_mv *c_mv, int block_idx) { - if (ref_frame == candidate_mi->mbmi.ref_frame[0]) { - if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) - c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[0].as_int; - else - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - } else if (ref_frame == candidate_mi->mbmi.ref_frame[1]) { - if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) - c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[1].as_int; - else - c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - } else { - return 0; - } - - return 1; + clamp_mv(&mv->as_mv, xd->mb_to_left_edge - MV_BORDER, + xd->mb_to_right_edge + MV_BORDER, + xd->mb_to_top_edge - MV_BORDER, + xd->mb_to_bottom_edge + MV_BORDER); } -// Gets candidate reference motion vector(s) from the given mode info -// structure if they exists and do NOT match the given reference frame. -static void get_non_matching_candidates(const MODE_INFO *candidate_mi, - MV_REFERENCE_FRAME ref_frame, - MV_REFERENCE_FRAME *c_ref_frame, - int_mv *c_mv, - MV_REFERENCE_FRAME *c2_ref_frame, - int_mv *c2_mv) { - - c_mv->as_int = 0; - c2_mv->as_int = 0; - *c_ref_frame = INTRA_FRAME; - *c2_ref_frame = INTRA_FRAME; - - // If first candidate not valid neither will be. - if (candidate_mi->mbmi.ref_frame[0] > INTRA_FRAME) { - // First candidate - if (candidate_mi->mbmi.ref_frame[0] != ref_frame) { - *c_ref_frame = candidate_mi->mbmi.ref_frame[0]; - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - } - - // Second candidate - if ((candidate_mi->mbmi.ref_frame[1] > INTRA_FRAME) && - (candidate_mi->mbmi.ref_frame[1] != ref_frame) && - (candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) { - *c2_ref_frame = candidate_mi->mbmi.ref_frame[1]; - c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - } - } +// This function returns either the appropriate sub block or block's mv +// on whether the block_size < 8x8 and we have check_sub_blocks set. +static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, + int check_sub_blocks, int which_mv, + int search_col, int block_idx) { + return (check_sub_blocks && candidate->mbmi.sb_type < BLOCK_SIZE_SB8X8 + ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mbmi.mv[which_mv]); } // Performs mv sign inversion if indicated by the reference frame combination. -static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame, - MV_REFERENCE_FRAME candidate_ref_frame, - int_mv *candidate_mv, int *ref_sign_bias) { +static INLINE int_mv scale_mv(const MODE_INFO *candidate, const int which_mv, + const MV_REFERENCE_FRAME this_ref_frame, + const int *ref_sign_bias) { + int_mv return_mv = candidate->mbmi.mv[which_mv]; // Sign inversion where appropriate. - if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) { - candidate_mv->as_mv.row = -candidate_mv->as_mv.row; - candidate_mv->as_mv.col = -candidate_mv->as_mv.col; + if (ref_sign_bias[candidate->mbmi.ref_frame[which_mv]] != + ref_sign_bias[this_ref_frame]) { + return_mv.as_mv.row *= -1; + return_mv.as_mv.col *= -1; } + return return_mv; } -// Add a candidate mv. -// Discard if it has already been seen. -static void add_candidate_mv(int_mv *mv_list, int *mv_scores, - int *candidate_count, int_mv candidate_mv, - int weight) { - if (*candidate_count == 0) { - mv_list[0].as_int = candidate_mv.as_int; - mv_scores[0] = weight; - *candidate_count += 1; - } else if ((*candidate_count == 1) && - (candidate_mv.as_int != mv_list[0].as_int)) { - mv_list[1].as_int = candidate_mv.as_int; - mv_scores[1] = weight; - *candidate_count += 1; +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector it will also +// skip all additional processing and jump to done! +#define ADD_MV_REF_LIST(MV) \ + if (refmv_count) { \ + if ((MV).as_int != mv_ref_list[0].as_int) { \ + mv_ref_list[refmv_count] = (MV); \ + goto Done; \ + } \ + } else { \ + mv_ref_list[refmv_count++] = (MV); \ + } + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \ + if ((CANDIDATE)->mbmi.ref_frame[0] != ref_frame) { \ + ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \ + } \ + if ((CANDIDATE)->mbmi.ref_frame[1] != ref_frame && \ + (CANDIDATE)->mbmi.ref_frame[1] > INTRA_FRAME && \ + (CANDIDATE)->mbmi.mv[1].as_int != (CANDIDATE)->mbmi.mv[0].as_int) { \ + ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \ } + +// Checks that the given mi_row, mi_col and search point +// are inside the borders of the tile. +static INLINE int is_inside(const int mi_col, const int mi_row, + const int cur_tile_mi_col_start, + const int cur_tile_mi_col_end, const int mi_rows, + const int (*mv_ref_search)[2], int idx) { + int mi_search_col; + const int mi_search_row = mi_row + mv_ref_search[idx][1];; + + // Check that the candidate is within the border. We only need to check + // the left side because all the positive right side ones are for blocks that + // are large enough to support the + value they have within their border. + if (mi_search_row < 0) + return 0; + + mi_search_col = mi_col + mv_ref_search[idx][0]; + if (mi_search_col < cur_tile_mi_col_start) + return 0; + + return 1; } // This function searches the neighbourhood of a given MB/SB // to try and find candidate reference vectors. -// void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, - MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int *ref_sign_bias, - int block_idx) { - int i; - MODE_INFO *candidate_mi; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int_mv c_refmv; - int_mv c2_refmv; - MV_REFERENCE_FRAME c_ref_frame; - MV_REFERENCE_FRAME c2_ref_frame; - int candidate_scores[MAX_MV_REF_CANDIDATES] = { 0 }; + const MODE_INFO *lf_here, + const MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, const int *ref_sign_bias, + const int block_idx, + const int mi_row, const int mi_col) { + int idx; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int refmv_count = 0; const int (*mv_ref_search)[2] = mv_ref_blocks[mbmi->sb_type]; - const int mi_col = get_mi_col(xd); - const int mi_row = get_mi_row(xd); - int intra_count = 0; - int zero_count = 0; - int newmv_count = 0; - int x_idx = 0, y_idx = 0; - - // Blank the reference vector lists and other local structures. - vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); - - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - x_idx = block_idx & 1; - y_idx = block_idx >> 1; - } - - // We first scan for candidate vectors that match the current reference frame - // Look at nearest neigbours - for (i = 0; i < 2; ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - int b; - - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - if (block_idx >= 0) { - if (mv_ref_search[i][0]) - b = 1 + y_idx * 2; - else - b = 2 + x_idx; - } else { - b = -1; - } - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, b)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); + const MODE_INFO *candidate; + const int check_sub_blocks = block_idx >= 0; + int different_ref_found = 0; + int context_counter = 0; + + // Blank the reference vector list + vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + // The nearest 2 blocks are treated differently + // if the size < 8x8 we get the mv from the bmi substructure, + // and we also need to keep a mode count. + for (idx = 0; idx < 2; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; + + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; + + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mbmi.mode]; + + // Check if the candidate comes from the same reference frame. + if (candidate->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 0, + mv_ref_search[idx][0], block_idx)); + different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame; + } else { + different_ref_found = 1; + if (candidate->mbmi.ref_frame[1] == ref_frame) { + // Add second motion vector if it has the same ref_frame. + ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 1, + mv_ref_search[idx][0], block_idx)); } - - // Count number of neihgbours coded intra and zeromv - intra_count += (candidate_mi->mbmi.mode < NEARESTMV); - zero_count += (candidate_mi->mbmi.mode == ZEROMV); - newmv_count += (candidate_mi->mbmi.mode >= NEWMV); } } - // More distant neigbours - for (i = 2; (i < MVREF_NEIGHBOURS) && - (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; idx < MVREF_NEIGHBOURS; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; + + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; + + if (candidate->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(candidate->mbmi.mv[0]); + different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame; + } else { + different_ref_found = 1; + if (candidate->mbmi.ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(candidate->mbmi.mv[1]); } } } - // Look in the last frame if it exists - if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { - candidate_mi = lf_here; - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); + // Check the last frame's mode and mv info. + if (lf_here != NULL) { + if (lf_here->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(lf_here->mbmi.mv[0]); + } else if (lf_here->mbmi.ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(lf_here->mbmi.mv[1]); } } - // If we have not found enough candidates consider ones where the - // reference frame does not match. Break out when we have - // MAX_MV_REF_CANDIDATES candidates. - // Look first at spatial neighbours - for (i = 0; (i < MVREF_NEIGHBOURS) && - (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - get_non_matching_candidates(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - if (c_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 1); - } + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (idx = 0; idx < MVREF_NEIGHBOURS; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; - if (c2_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c2_refmv, 1); - } - } - } + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; - // Look at the last frame if it exists - if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { - candidate_mi = lf_here; - get_non_matching_candidates(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - if (c_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 1); - } + // If the candidate is INTRA we don't want to consider its mv. + if (candidate->mbmi.ref_frame[0] == INTRA_FRAME) + continue; - if (c2_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c2_refmv, 1); + IF_DIFF_REF_FRAME_ADD_MV(candidate); } } - if (!intra_count) { - if (!newmv_count) { - // 0 = both zero mv - // 1 = one zero mv + one a predicted mv - // 2 = two predicted mvs - mbmi->mb_mode_context[ref_frame] = 2 - zero_count; - } else { - // 3 = one predicted/zero and one new mv - // 4 = two new mvs - mbmi->mb_mode_context[ref_frame] = 2 + newmv_count; - } - } else { - // 5 = one intra neighbour + x - // 6 = two intra neighbours - mbmi->mb_mode_context[ref_frame] = 4 + intra_count; + // Since we still don't have a candidate we'll try the last frame. + if (lf_here != NULL && lf_here->mbmi.ref_frame[0] != INTRA_FRAME) { + IF_DIFF_REF_FRAME_ADD_MV(lf_here); } + Done: + + mbmi->mb_mode_context[ref_frame] = counter_to_context[context_counter]; + // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - clamp_mv_ref(xd, &mv_ref_list[i]); + for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) { + clamp_mv_ref(xd, &mv_ref_list[idx]); } } + +#undef ADD_MV_REF_LIST +#undef IF_DIFF_REF_FRAME_ADD_MV diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h index 7290f10ab..c5f89eb57 100644 --- a/libvpx/vp9/common/vp9_mvref_common.h +++ b/libvpx/vp9/common/vp9_mvref_common.h @@ -17,11 +17,13 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, - MODE_INFO *lf_here, - MV_REFERENCE_FRAME ref_frame, + const MODE_INFO *lf_here, + const MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int *ref_sign_bias, - int block_idx); + const int *ref_sign_bias, + const int block_idx, + const int mi_row, + const int mi_col); static INLINE void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -29,9 +31,10 @@ static INLINE void vp9_find_mv_refs(VP9_COMMON *cm, MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int *ref_sign_bias) { + int *ref_sign_bias, + int mi_row, int mi_col) { vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame, - mv_ref_list, ref_sign_bias, -1); + mv_ref_list, ref_sign_bias, -1, mi_row, mi_col); } #endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index f31f24b26..152a93293 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -42,7 +42,7 @@ typedef struct frame_contexts { vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] [PARTITION_TYPES - 1]; - vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES]; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1]; @@ -59,12 +59,12 @@ typedef struct { unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES]; unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES]; unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; - vp9_coeff_count_model coef[TX_SIZE_MAX_SB][BLOCK_TYPES]; - unsigned int eob_branch[TX_SIZE_MAX_SB][BLOCK_TYPES][REF_TYPES] + vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES]; + unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES] [COEF_BANDS][PREV_COEF_CONTEXTS]; unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][2][2]; @@ -240,8 +240,7 @@ static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd, xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); } -static int check_bsize_coverage(VP9_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, +static int check_bsize_coverage(VP9_COMMON *cm, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) { int bsl = mi_width_log2(bsize), bs = 1 << bsl; int ms = bs / 2; @@ -278,14 +277,6 @@ static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd, xd->right_available = (mi_col + bw < cm->cur_tile_mi_col_end); } -static int get_mi_row(const MACROBLOCKD *xd) { - return ((-xd->mb_to_top_edge) >> (3 + LOG2_MI_SIZE)); -} - -static int get_mi_col(const MACROBLOCKD *xd) { - return ((-xd->mb_to_left_edge) >> (3 + LOG2_MI_SIZE)); -} - static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); } diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c index e8bcdea82..795962a71 100644 --- a/libvpx/vp9/common/vp9_pred_common.c +++ b/libvpx/vp9/common/vp9_pred_common.c @@ -55,34 +55,28 @@ unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { } // Returns a context number for the given MB prediction signal unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) { - int pred_context; const MODE_INFO *const mi = xd->mode_info_context; const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi; const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi; const int left_in_image = xd->left_available && left_mbmi->mb_in_image; const int above_in_image = xd->up_available && above_mbmi->mb_in_image; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. - if (above_in_image && left_in_image) { // both edges available - if (left_mbmi->ref_frame[0] == INTRA_FRAME && - above_mbmi->ref_frame[0] == INTRA_FRAME) { // intra/intra (3) - pred_context = 3; - } else { // intra/inter (1) or inter/inter (0) - pred_context = left_mbmi->ref_frame[0] == INTRA_FRAME || - above_mbmi->ref_frame[0] == INTRA_FRAME; - } - } else if (above_in_image || left_in_image) { // one edge available - const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mbmi); - // inter: 0, intra: 2 - pred_context = 2 * (edge_mbmi->ref_frame[0] == INTRA_FRAME); - } else { - pred_context = 0; - } - assert(pred_context >= 0 && pred_context < INTRA_INTER_CONTEXTS); - return pred_context; + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + // 0 - inter/inter, inter/--, --/inter, --/-- + // 1 - intra/inter, inter/intra + // 2 - intra/--, --/intra + // 3 - intra/intra + if (above_in_image && left_in_image) // both edges available + return left_intra && above_intra ? 3 + : left_intra || above_intra; + else if (above_in_image || left_in_image) // one edge available + return 2 * (above_in_image ? above_intra : left_intra); + else + return 0; } // Returns a context number for the given MB prediction signal unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm, diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h index e4b6575e3..238290b41 100644 --- a/libvpx/vp9/common/vp9_pred_common.h +++ b/libvpx/vp9/common/vp9_pred_common.h @@ -110,9 +110,9 @@ unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd); static const vp9_prob *get_tx_probs(BLOCK_SIZE_TYPE bsize, uint8_t context, const struct tx_probs *tx_probs) { - if (bsize < BLOCK_SIZE_MB16X16) + if (bsize < BLOCK_16X16) return tx_probs->p8x8[context]; - else if (bsize < BLOCK_SIZE_SB32X32) + else if (bsize < BLOCK_32X32) return tx_probs->p16x16[context]; else return tx_probs->p32x32[context]; @@ -127,9 +127,9 @@ static const vp9_prob *get_tx_probs2(const MACROBLOCKD *xd, static void update_tx_counts(BLOCK_SIZE_TYPE bsize, uint8_t context, TX_SIZE tx_size, struct tx_counts *tx_counts) { - if (bsize >= BLOCK_SIZE_SB32X32) + if (bsize >= BLOCK_32X32) tx_counts->p32x32[context][tx_size]++; - else if (bsize >= BLOCK_SIZE_MB16X16) + else if (bsize >= BLOCK_16X16) tx_counts->p16x16[context][tx_size]++; else tx_counts->p8x8[context][tx_size]++; diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index 63e5646ad..0b65e0610 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -197,14 +197,14 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *src_mv, + const MV *src_mv, const struct scale_factors *scale, int w, int h, int weight, const struct subpix_fn_table *subpix, enum mv_precision precision) { const MV32 mv = precision == MV_PRECISION_Q4 - ? scale->scale_mv_q4(&src_mv->as_mv, scale) - : scale->scale_mv_q3_to_q4(&src_mv->as_mv, scale); + ? scale->scale_mv_q4(src_mv, scale) + : scale->scale_mv_q3_to_q4(src_mv, scale); const int subpel_x = mv.col & 15; const int subpel_y = mv.row & 15; @@ -220,45 +220,44 @@ static INLINE int round_mv_comp_q4(int value) { return (value < 0 ? value - 2 : value + 2) / 4; } -static int mi_mv_pred_row_q4(MACROBLOCKD *mb, int idx) { - const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[1].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[2].as_mv[idx].as_mv.row + - mb->mode_info_context->bmi[3].as_mv[idx].as_mv.row; - return round_mv_comp_q4(temp); +static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { + MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; + return res; } -static int mi_mv_pred_col_q4(MACROBLOCKD *mb, int idx) { - const int temp = mb->mode_info_context->bmi[0].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[1].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[2].as_mv[idx].as_mv.col + - mb->mode_info_context->bmi[3].as_mv[idx].as_mv.col; - return round_mv_comp_q4(temp); -} + // TODO(jkoleszar): yet another mv clamping function :-( MV clamp_mv_to_umv_border_sb(const MV *src_mv, int bwl, int bhl, int ss_x, int ss_y, int mb_to_left_edge, int mb_to_top_edge, int mb_to_right_edge, int mb_to_bottom_edge) { - /* If the MV points so far into the UMV border that no visible pixels - * are used for reconstruction, the subpel part of the MV can be - * discarded and the MV limited to 16 pixels with equivalent results. - */ + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4; const int spel_right = spel_left - (1 << 4); const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4; const int spel_bottom = spel_top - (1 << 4); - MV clamped_mv; - + MV clamped_mv = { + src_mv->row << (1 - ss_y), + src_mv->col << (1 - ss_x) + }; assert(ss_x <= 1); assert(ss_y <= 1); - clamped_mv.col = clamp(src_mv->col << (1 - ss_x), - (mb_to_left_edge << (1 - ss_x)) - spel_left, - (mb_to_right_edge << (1 - ss_x)) + spel_right); - clamped_mv.row = clamp(src_mv->row << (1 - ss_y), - (mb_to_top_edge << (1 - ss_y)) - spel_top, - (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); + + clamp_mv(&clamped_mv, (mb_to_left_edge << (1 - ss_x)) - spel_left, + (mb_to_right_edge << (1 - ss_x)) + spel_right, + (mb_to_top_edge << (1 - ss_y)) - spel_top, + (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); + return clamped_mv; } @@ -280,15 +279,14 @@ static void build_inter_predictors(int plane, int block, const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; const int x = 4 * (block & ((1 << bwl) - 1)), y = 4 * (block >> bwl); - const int use_second_ref = xd->mode_info_context->mbmi.ref_frame[1] > 0; + const MODE_INFO *const mi = xd->mode_info_context; + const int use_second_ref = mi->mbmi.ref_frame[1] > 0; int which_mv; assert(x < (4 << bwl)); assert(y < (4 << bhl)); - assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 || - 4 << pred_w == (4 << bwl)); - assert(xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 || - 4 << pred_h == (4 << bhl)); + assert(mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 || 4 << pred_w == (4 << bwl)); + assert(mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 || 4 << pred_h == (4 << bhl)); for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { // source @@ -301,44 +299,30 @@ static void build_inter_predictors(int plane, int block, // dest uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x; - // motion vector - const MV *mv; - MV split_chroma_mv; - int_mv clamped_mv; - - if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - if (plane == 0) { - mv = &xd->mode_info_context->bmi[block].as_mv[which_mv].as_mv; - } else { - // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the - // same MV (the average of the 4 luma MVs) but we could do something - // smarter for non-4:2:0. Just punt for now, pending the changes to get - // rid of SPLITMV mode entirely. - split_chroma_mv.row = mi_mv_pred_row_q4(xd, which_mv); - split_chroma_mv.col = mi_mv_pred_col_q4(xd, which_mv); - mv = &split_chroma_mv; - } - } else { - mv = &xd->mode_info_context->mbmi.mv[which_mv].as_mv; - } - - /* TODO(jkoleszar): This clamping is done in the incorrect place for the - * scaling case. It needs to be done on the scaled MV, not the pre-scaling - * MV. Note however that it performs the subsampling aware scaling so - * that the result is always q4. - */ - clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl, - xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y, - xd->mb_to_left_edge, - xd->mb_to_top_edge, - xd->mb_to_right_edge, - xd->mb_to_bottom_edge); + // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the + // same MV (the average of the 4 luma MVs) but we could do something + // smarter for non-4:2:0. Just punt for now, pending the changes to get + // rid of SPLITMV mode entirely. + const MV mv = mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 + ? (plane == 0 ? mi->bmi[block].as_mv[which_mv].as_mv + : mi_mv_pred_q4(mi, which_mv)) + : mi->mbmi.mv[which_mv].as_mv; + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + const MV res_mv = clamp_mv_to_umv_border_sb(&mv, bwl, bhl, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y, + xd->mb_to_left_edge, + xd->mb_to_top_edge, + xd->mb_to_right_edge, + xd->mb_to_bottom_edge); scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); - vp9_build_inter_predictor(pre, pre_stride, dst, arg->dst_stride[plane], - &clamped_mv, &xd->scale_factor[which_mv], + &res_mv, &xd->scale_factor[which_mv], 4 << pred_w, 4 << pred_h, which_mv, &xd->subpix, MV_PRECISION_Q4); } @@ -400,7 +384,7 @@ void vp9_setup_scale_factors(VP9_COMMON *cm, int i) { const int ref = cm->active_ref_idx[i]; struct scale_factors *const sf = &cm->active_ref_scale[i]; if (ref >= NUM_YV12_BUFFERS) { - memset(sf, 0, sizeof(*sf)); + vp9_zero(*sf); } else { YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[ref]; vp9_setup_scale_factors_for_frame(sf, diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h index e37750dea..6ec7323e1 100644 --- a/libvpx/vp9/common/vp9_reconinter.h +++ b/libvpx/vp9/common/vp9_reconinter.h @@ -39,7 +39,7 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *mv_q3, + const MV *mv_q3, const struct scale_factors *scale, int w, int h, int do_avg, const struct subpix_fn_table *subpix, diff --git a/libvpx/vp9/common/vp9_rtcd_defs.sh b/libvpx/vp9/common/vp9_rtcd_defs.sh index c357ef62a..6bb3cb888 100644 --- a/libvpx/vp9/common/vp9_rtcd_defs.sh +++ b/libvpx/vp9/common/vp9_rtcd_defs.sh @@ -7,9 +7,7 @@ cat <<EOF #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -22,7 +20,11 @@ EOF } forward_decls vp9_common_forward_decls -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 +# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. +[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse2_x86inc=sse2 && ssse3_x86inc=ssse3 + +# this variable is for functions that are 64 bit only. +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3 # # Dequant @@ -47,7 +49,7 @@ prototype void vp9_d27_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui specialize vp9_d27_predictor_4x4 prototype void vp9_d45_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_d45_predictor_4x4 +specialize vp9_d45_predictor_4x4 ssse3 prototype void vp9_d63_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d63_predictor_4x4 @@ -86,7 +88,7 @@ prototype void vp9_d27_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui specialize vp9_d27_predictor_8x8 prototype void vp9_d45_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_d45_predictor_8x8 +specialize vp9_d45_predictor_8x8 ssse3 prototype void vp9_d63_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d63_predictor_8x8 @@ -125,7 +127,7 @@ prototype void vp9_d27_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, specialize vp9_d27_predictor_16x16 prototype void vp9_d45_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_d45_predictor_16x16 +specialize vp9_d45_predictor_16x16 ssse3 prototype void vp9_d63_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d63_predictor_16x16 @@ -164,7 +166,7 @@ prototype void vp9_d27_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, specialize vp9_d27_predictor_32x32 prototype void vp9_d45_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_d45_predictor_32x32 +specialize vp9_d45_predictor_32x32 ssse3 prototype void vp9_d63_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d63_predictor_32x32 @@ -214,7 +216,7 @@ fi # Loopfilter # prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_mb_lpf_vertical_edge_w sse2 +specialize vp9_mb_lpf_vertical_edge_w sse2 neon prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mbloop_filter_vertical_edge sse2 neon @@ -223,7 +225,7 @@ prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8 specialize vp9_loop_filter_vertical_edge mmx neon prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_mb_lpf_horizontal_edge_w sse2 +specialize vp9_mb_lpf_horizontal_edge_w sse2 neon prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mbloop_filter_horizontal_edge sse2 neon @@ -265,10 +267,10 @@ specialize vp9_blend_b # Sub Pixel Filters # prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_copy sse2 +specialize vp9_convolve_copy $sse2_x86inc prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_avg sse2 +specialize vp9_convolve_avg $sse2_x86inc prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" specialize vp9_convolve8 ssse3 neon @@ -297,14 +299,17 @@ specialize vp9_short_idct4x4_1_add sse2 prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct4x4_add sse2 +prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct8x8_1_add sse2 + prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct8x8_add sse2 neon prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct10_8x8_add sse2 -prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output" -specialize vp9_short_idct1_8x8 +prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct16x16_1_add sse2 prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct16x16_add sse2 @@ -312,18 +317,12 @@ specialize vp9_short_idct16x16_add sse2 prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct10_16x16_add sse2 -prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output" -specialize vp9_short_idct1_16x16 - prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct32x32_add sse2 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" specialize vp9_short_idct1_32x32 -prototype void vp9_short_idct10_32x32_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct10_32x32_add - prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" specialize vp9_short_iht4x4_add sse2 @@ -702,12 +701,10 @@ specialize vp9_get_mb_ss mmx sse2 # ENCODEMB INVOKE prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, int64_t *ssz" -specialize vp9_block_error sse2 +specialize vp9_block_error $sse2_x86inc prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" -specialize vp9_subtract_block sse2 - -[ $arch = "x86_64" ] && ssse3_x86_64=ssse3 +specialize vp9_subtract_block $sse2_x86inc prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" specialize vp9_quantize_b $ssse3_x86_64 @@ -719,13 +716,11 @@ specialize vp9_quantize_b_32x32 $ssse3_x86_64 # Structured Similarity (SSIM) # if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 - prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_8x8 $sse2_on_x86_64 + specialize vp9_ssim_parms_8x8 $sse2_x86_64 prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_16x16 $sse2_on_x86_64 + specialize vp9_ssim_parms_16x16 $sse2_x86_64 fi # fdct functions diff --git a/libvpx/vp9/common/vp9_treecoder.h b/libvpx/vp9/common/vp9_treecoder.h index ebcd4116f..31182c35c 100644 --- a/libvpx/vp9/common/vp9_treecoder.h +++ b/libvpx/vp9/common/vp9_treecoder.h @@ -79,4 +79,22 @@ static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) { return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); } +static INLINE vp9_prob merge_probs(vp9_prob pre_prob, vp9_prob prob, + const unsigned int ct[2], + unsigned int count_sat, + unsigned int max_update_factor) { + const unsigned int count = MIN(ct[0] + ct[1], count_sat); + const unsigned int factor = max_update_factor * count / count_sat; + return weighted_prob(pre_prob, prob, factor); +} + +static INLINE vp9_prob merge_probs2(vp9_prob pre_prob, + const unsigned int ct[2], + unsigned int count_sat, + unsigned int max_update_factor) { + return merge_probs(pre_prob, get_binary_prob(ct[0], ct[1]), ct, count_sat, + max_update_factor); +} + + #endif // VP9_COMMON_VP9_TREECODER_H_ diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index a1e14b482..8f740f412 100644 --- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -523,9 +523,9 @@ void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride, { \ __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ d0 = _mm_unpacklo_epi8(d0, zero); \ - in_x = _mm_add_epi16(in_x, d0); \ - in_x = _mm_packus_epi16(in_x, in_x); \ - _mm_storel_epi64((__m128i *)(dest), in_x); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + _mm_storel_epi64((__m128i *)(dest), d0); \ dest += stride; \ } @@ -597,6 +597,27 @@ void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest, in7); } +void vp9_short_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a; + + a = dct_const_round_shift(input[0] * cospi_16_64); + a = dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 5); + + dc_value = _mm_set1_epi16(a); + + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); +} + // perform 8x8 transpose static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); @@ -1449,6 +1470,38 @@ void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) { } } +void vp9_short_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a, i; + + a = dct_const_round_shift(input[0] * cospi_16_64); + a = dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 6); + + dc_value = _mm_set1_epi16(a); + + for (i = 0; i < 2; ++i) { + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + RECON_AND_STORE(dest, dc_value); + dest += 8 - (stride * 16); + } +} + static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { __m128i tbuf[8]; array_transpose_8x8(res0, res0); @@ -2760,6 +2813,12 @@ void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest, } } +#define LOAD_DQCOEFF(reg, input) \ + { \ + reg = _mm_load_si128((__m128i *) input); \ + input += 8; \ + } \ + void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -2827,48 +2886,126 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i, j; + int i, j, i32; + __m128i zero_idx[16]; + int zero_flag[2]; // We work on a 8x32 block each time, and loop 8 times for 2-D 32x32 idct. for (i = 0; i < 8; i++) { + i32 = (i << 5); if (i < 4) { // First 1-D idct // Load input data. - in0 = _mm_load_si128((__m128i *)input); - in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in16 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in24 = _mm_load_si128((__m128i *)(input + 8 * 3)); - in1 = _mm_load_si128((__m128i *)(input + 8 * 4)); - in9 = _mm_load_si128((__m128i *)(input + 8 * 5)); - in17 = _mm_load_si128((__m128i *)(input + 8 * 6)); - in25 = _mm_load_si128((__m128i *)(input + 8 * 7)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 8)); - in10 = _mm_load_si128((__m128i *)(input + 8 * 9)); - in18 = _mm_load_si128((__m128i *)(input + 8 * 10)); - in26 = _mm_load_si128((__m128i *)(input + 8 * 11)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 12)); - in11 = _mm_load_si128((__m128i *)(input + 8 * 13)); - in19 = _mm_load_si128((__m128i *)(input + 8 * 14)); - in27 = _mm_load_si128((__m128i *)(input + 8 * 15)); - - in4 = _mm_load_si128((__m128i *)(input + 8 * 16)); - in12 = _mm_load_si128((__m128i *)(input + 8 * 17)); - in20 = _mm_load_si128((__m128i *)(input + 8 * 18)); - in28 = _mm_load_si128((__m128i *)(input + 8 * 19)); - in5 = _mm_load_si128((__m128i *)(input + 8 * 20)); - in13 = _mm_load_si128((__m128i *)(input + 8 * 21)); - in21 = _mm_load_si128((__m128i *)(input + 8 * 22)); - in29 = _mm_load_si128((__m128i *)(input + 8 * 23)); - in6 = _mm_load_si128((__m128i *)(input + 8 * 24)); - in14 = _mm_load_si128((__m128i *)(input + 8 * 25)); - in22 = _mm_load_si128((__m128i *)(input + 8 * 26)); - in30 = _mm_load_si128((__m128i *)(input + 8 * 27)); - in7 = _mm_load_si128((__m128i *)(input + 8 * 28)); - in15 = _mm_load_si128((__m128i *)(input + 8 * 29)); - in23 = _mm_load_si128((__m128i *)(input + 8 * 30)); - in31 = _mm_load_si128((__m128i *)(input + 8 * 31)); - - input += 256; + LOAD_DQCOEFF(in0, input); + LOAD_DQCOEFF(in8, input); + LOAD_DQCOEFF(in16, input); + LOAD_DQCOEFF(in24, input); + LOAD_DQCOEFF(in1, input); + LOAD_DQCOEFF(in9, input); + LOAD_DQCOEFF(in17, input); + LOAD_DQCOEFF(in25, input); + LOAD_DQCOEFF(in2, input); + LOAD_DQCOEFF(in10, input); + LOAD_DQCOEFF(in18, input); + LOAD_DQCOEFF(in26, input); + LOAD_DQCOEFF(in3, input); + LOAD_DQCOEFF(in11, input); + LOAD_DQCOEFF(in19, input); + LOAD_DQCOEFF(in27, input); + + LOAD_DQCOEFF(in4, input); + LOAD_DQCOEFF(in12, input); + LOAD_DQCOEFF(in20, input); + LOAD_DQCOEFF(in28, input); + LOAD_DQCOEFF(in5, input); + LOAD_DQCOEFF(in13, input); + LOAD_DQCOEFF(in21, input); + LOAD_DQCOEFF(in29, input); + LOAD_DQCOEFF(in6, input); + LOAD_DQCOEFF(in14, input); + LOAD_DQCOEFF(in22, input); + LOAD_DQCOEFF(in30, input); + LOAD_DQCOEFF(in7, input); + LOAD_DQCOEFF(in15, input); + LOAD_DQCOEFF(in23, input); + LOAD_DQCOEFF(in31, input); + + // checking if all entries are zero + zero_idx[0] = _mm_or_si128(in0, in1); + zero_idx[1] = _mm_or_si128(in2, in3); + zero_idx[2] = _mm_or_si128(in4, in5); + zero_idx[3] = _mm_or_si128(in6, in7); + zero_idx[4] = _mm_or_si128(in8, in9); + zero_idx[5] = _mm_or_si128(in10, in11); + zero_idx[6] = _mm_or_si128(in12, in13); + zero_idx[7] = _mm_or_si128(in14, in15); + zero_idx[8] = _mm_or_si128(in16, in17); + zero_idx[9] = _mm_or_si128(in18, in19); + zero_idx[10] = _mm_or_si128(in20, in21); + zero_idx[11] = _mm_or_si128(in22, in23); + zero_idx[12] = _mm_or_si128(in24, in25); + zero_idx[13] = _mm_or_si128(in26, in27); + zero_idx[14] = _mm_or_si128(in28, in29); + zero_idx[15] = _mm_or_si128(in30, in31); + + zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); + zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); + + zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); + + zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]); + zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]); + zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32); + zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]); + zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]); + + if (!zero_flag[0] && !zero_flag[1]) { + col[i32 + 0] = _mm_setzero_si128(); + col[i32 + 1] = _mm_setzero_si128(); + col[i32 + 2] = _mm_setzero_si128(); + col[i32 + 3] = _mm_setzero_si128(); + col[i32 + 4] = _mm_setzero_si128(); + col[i32 + 5] = _mm_setzero_si128(); + col[i32 + 6] = _mm_setzero_si128(); + col[i32 + 7] = _mm_setzero_si128(); + col[i32 + 8] = _mm_setzero_si128(); + col[i32 + 9] = _mm_setzero_si128(); + col[i32 + 10] = _mm_setzero_si128(); + col[i32 + 11] = _mm_setzero_si128(); + col[i32 + 12] = _mm_setzero_si128(); + col[i32 + 13] = _mm_setzero_si128(); + col[i32 + 14] = _mm_setzero_si128(); + col[i32 + 15] = _mm_setzero_si128(); + col[i32 + 16] = _mm_setzero_si128(); + col[i32 + 17] = _mm_setzero_si128(); + col[i32 + 18] = _mm_setzero_si128(); + col[i32 + 19] = _mm_setzero_si128(); + col[i32 + 20] = _mm_setzero_si128(); + col[i32 + 21] = _mm_setzero_si128(); + col[i32 + 22] = _mm_setzero_si128(); + col[i32 + 23] = _mm_setzero_si128(); + col[i32 + 24] = _mm_setzero_si128(); + col[i32 + 25] = _mm_setzero_si128(); + col[i32 + 26] = _mm_setzero_si128(); + col[i32 + 27] = _mm_setzero_si128(); + col[i32 + 28] = _mm_setzero_si128(); + col[i32 + 29] = _mm_setzero_si128(); + col[i32 + 30] = _mm_setzero_si128(); + col[i32 + 31] = _mm_setzero_si128(); + continue; + } // Transpose 32x8 block to 8x32 block TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, @@ -3239,38 +3376,38 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { // final stage if (i < 4) { // 1_D: Store 32 intermediate results for each 8x32 block. - col[i * 32 + 0] = _mm_add_epi16(stp1_0, stp1_31); - col[i * 32 + 1] = _mm_add_epi16(stp1_1, stp1_30); - col[i * 32 + 2] = _mm_add_epi16(stp1_2, stp1_29); - col[i * 32 + 3] = _mm_add_epi16(stp1_3, stp1_28); - col[i * 32 + 4] = _mm_add_epi16(stp1_4, stp1_27); - col[i * 32 + 5] = _mm_add_epi16(stp1_5, stp1_26); - col[i * 32 + 6] = _mm_add_epi16(stp1_6, stp1_25); - col[i * 32 + 7] = _mm_add_epi16(stp1_7, stp1_24); - col[i * 32 + 8] = _mm_add_epi16(stp1_8, stp1_23); - col[i * 32 + 9] = _mm_add_epi16(stp1_9, stp1_22); - col[i * 32 + 10] = _mm_add_epi16(stp1_10, stp1_21); - col[i * 32 + 11] = _mm_add_epi16(stp1_11, stp1_20); - col[i * 32 + 12] = _mm_add_epi16(stp1_12, stp1_19); - col[i * 32 + 13] = _mm_add_epi16(stp1_13, stp1_18); - col[i * 32 + 14] = _mm_add_epi16(stp1_14, stp1_17); - col[i * 32 + 15] = _mm_add_epi16(stp1_15, stp1_16); - col[i * 32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); - col[i * 32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); - col[i * 32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); - col[i * 32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); - col[i * 32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); - col[i * 32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); - col[i * 32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); - col[i * 32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); - col[i * 32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); - col[i * 32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); - col[i * 32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); - col[i * 32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); - col[i * 32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); - col[i * 32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); - col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); - col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); + col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); + col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); + col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); + col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); + col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); + col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); + col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); + col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); + col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); + col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); + col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); + col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); + col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); + col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); + col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); + col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); + col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); + col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); + col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); + col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); + col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); + col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); + col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); + col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); + col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); + col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); + col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); + col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); + col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); + col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); + col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); } else { const __m128i zero = _mm_setzero_si128(); diff --git a/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm b/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm index bc8ed5c1f..8ba26f310 100644 --- a/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm +++ b/libvpx/vp9/common/x86/vp9_intrapred_ssse3.asm @@ -10,6 +10,31 @@ %include "third_party/x86inc/x86inc.asm" +SECTION_RODATA + +pb_1: times 16 db 1 +pw_2: times 8 dw 2 +pb_7m1: times 8 db 7, -1 +pb_15: times 16 db 15 + +sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7 +sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7 +sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b2w01234577: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 7, -1, 7, -1 +sh_b2w12345677: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1 +sh_b2w23456777: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1, 7, -1 +sh_b2w01234567: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1 +sh_b2w12345678: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1 +sh_b2w23456789: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1, 9, -1 +sh_b2w89abcdef: db 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1 +sh_b2w9abcdeff: db 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1 +sh_b2wabcdefff: db 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1, 15, -1 +sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 +sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 + SECTION .text INIT_MMX ssse3 @@ -85,3 +110,182 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left inc lineq jnz .loop REP_RET + +INIT_MMX ssse3 +cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above + movq m0, [aboveq] + pshufb m2, m0, [sh_b23456777] + pshufb m1, m0, [sh_b01234577] + pshufb m0, [sh_b12345677] + pavgb m3, m2, m1 + pxor m2, m1 + pand m2, [pb_1] + psubb m3, m2 + pavgb m0, m3 + + ; store 4 lines + movd [dstq ], m0 + psrlq m0, 8 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + psrlq m0, 8 + movd [dstq ], m0 + psrlq m0, 8 + movd [dstq+strideq], m0 + RET + +INIT_MMX ssse3 +cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above + movq m0, [aboveq] + mova m1, [sh_b12345677] + DEFINE_ARGS dst, stride, stride3, line + lea stride3q, [strideq*3] + pshufb m2, m0, [sh_b23456777] + pavgb m3, m2, m0 + pxor m2, m0 + pshufb m0, m1 + pand m2, [pb_1] + psubb m3, m2 + pavgb m0, m3 + + ; store 4 lines + movq [dstq ], m0 + pshufb m0, m1 + movq [dstq+strideq ], m0 + pshufb m0, m1 + movq [dstq+strideq*2], m0 + pshufb m0, m1 + movq [dstq+stride3q ], m0 + pshufb m0, m1 + lea dstq, [dstq+strideq*4] + + ; store next 4 lines + movq [dstq ], m0 + pshufb m0, m1 + movq [dstq+strideq ], m0 + pshufb m0, m1 + movq [dstq+strideq*2], m0 + pshufb m0, m1 + movq [dstq+stride3q ], m0 + RET + +INIT_XMM ssse3 +cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, dst8, line + lea stride3q, [strideq*3] + lea dst8q, [dstq+strideq*8] + mova m1, [sh_b123456789abcdeff] + pshufb m2, m0, [sh_b23456789abcdefff] + pavgb m3, m2, m0 + pxor m2, m0 + pshufb m0, m1 + pand m2, [pb_1] + psubb m3, m2 + pavgb m0, m3 + + ; first 4 lines and first half of 3rd 4 lines + mov lined, 2 +.loop: + mova [dstq ], m0 + movhps [dst8q ], m0 + pshufb m0, m1 + mova [dstq +strideq ], m0 + movhps [dst8q+strideq ], m0 + pshufb m0, m1 + mova [dstq +strideq*2 ], m0 + movhps [dst8q+strideq*2 ], m0 + pshufb m0, m1 + mova [dstq +stride3q ], m0 + movhps [dst8q+stride3q ], m0 + pshufb m0, m1 + lea dstq, [dstq +strideq*4] + lea dst8q, [dst8q+strideq*4] + dec lined + jnz .loop + + ; bottom-right 8x8 block + movhps [dstq +8], m0 + movhps [dstq+strideq +8], m0 + movhps [dstq+strideq*2+8], m0 + movhps [dstq+stride3q +8], m0 + lea dstq, [dstq+strideq*4] + movhps [dstq +8], m0 + movhps [dstq+strideq +8], m0 + movhps [dstq+strideq*2+8], m0 + movhps [dstq+stride3q +8], m0 + RET + +INIT_XMM ssse3 +cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line + mova m0, [aboveq] + mova m4, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, dst16, line + lea stride3q, [strideq*3] + lea dst16q, [dstq +strideq*8] + lea dst16q, [dst16q+strideq*8] + mova m1, [sh_b123456789abcdeff] + pshufb m2, m4, [sh_b23456789abcdefff] + pavgb m3, m2, m4 + pxor m2, m4 + palignr m5, m4, m0, 1 + palignr m6, m4, m0, 2 + pshufb m4, m1 + pand m2, [pb_1] + psubb m3, m2 + pavgb m4, m3 + pavgb m3, m0, m6 + pxor m0, m6 + pand m0, [pb_1] + psubb m3, m0 + pavgb m5, m3 + + ; write 4x4 lines (and the first half of the second 4x4 lines) + mov lined, 4 +.loop: + mova [dstq ], m5 + mova [dstq +16], m4 + mova [dst16q ], m4 + palignr m3, m4, m5, 1 + pshufb m4, m1 + mova [dstq +strideq ], m3 + mova [dstq +strideq +16], m4 + mova [dst16q+strideq ], m4 + palignr m5, m4, m3, 1 + pshufb m4, m1 + mova [dstq +strideq*2 ], m5 + mova [dstq +strideq*2+16], m4 + mova [dst16q+strideq*2 ], m4 + palignr m3, m4, m5, 1 + pshufb m4, m1 + mova [dstq +stride3q ], m3 + mova [dstq +stride3q +16], m4 + mova [dst16q+stride3q ], m4 + palignr m5, m4, m3, 1 + pshufb m4, m1 + lea dstq, [dstq +strideq*4] + lea dst16q, [dst16q+strideq*4] + dec lined + jnz .loop + + ; write second half of second 4x4 lines + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + RET diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 6f0044a4a..a3e2ad39d 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -30,8 +30,12 @@ static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p); } -static MB_PREDICTION_MODE read_inter_mode(vp9_reader *r, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(r, vp9_inter_mode_tree, p); +static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, + uint8_t context) { + MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, + cm->fc.inter_mode_probs[context]); + ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; + return mode; } static int read_segment_id(vp9_reader *r, const struct segmentation *seg) { @@ -43,9 +47,9 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, const uint8_t context = vp9_get_pred_context_tx_size(xd); const vp9_prob *tx_probs = get_tx_probs(bsize, context, &cm->fc.tx_probs); TX_SIZE tx_size = vp9_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) { + if (tx_size != TX_4X4 && bsize >= BLOCK_16X16) { tx_size += vp9_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32) + if (tx_size != TX_8X8 && bsize >= BLOCK_32X32) tx_size += vp9_read(r, tx_probs[2]); } @@ -54,18 +58,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, } static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode, - BLOCK_SIZE_TYPE bsize, int select_cond, + BLOCK_SIZE_TYPE bsize, int allow_select, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - if (tx_mode == TX_MODE_SELECT && bsize >= BLOCK_SIZE_SB8X8 && select_cond) + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, bsize, r); - else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_SIZE_SB32X32) + else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32) return TX_32X32; - else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_SIZE_MB16X16) + else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16) return TX_16X16; - else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_SIZE_SB8X8) + else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8) return TX_8X8; else return TX_4X4; @@ -146,8 +150,8 @@ static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { return skip_coeff; } -static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, - int mi_row, int mi_col, vp9_reader *r) { +static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, + int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &m->mbmi; @@ -158,6 +162,7 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_SIZE_SB8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); @@ -166,12 +171,12 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); } else { // Only 4x4, 4x8, 8x4 blocks - const int bw = 1 << b_width_log2(bsize); - const int bh = 1 << b_height_log2(bsize); + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis); const MB_PREDICTION_MODE L = (xd->left_available || idx) ? @@ -179,9 +184,9 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, const MB_PREDICTION_MODE b_mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); m->bmi[ib].as_mode = b_mode; - if (bh == 2) + if (num_4x4_h == 2) m->bmi[ib + 2].as_mode = b_mode; - if (bw == 2) + if (num_4x4_w == 2) m->bmi[ib + 1].as_mode = b_mode; } } @@ -228,16 +233,16 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, - nmv_context_counts *counts, int usehp) { + nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints); + const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - usehp = usehp && vp9_use_mv_hp(ref); if (mv_joint_vertical(j)) - diff.row = read_mv_component(r, &ctx->comps[0], usehp); + diff.row = read_mv_component(r, &ctx->comps[0], use_hp); if (mv_joint_horizontal(j)) - diff.col = read_mv_component(r, &ctx->comps[1], usehp); + diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -245,29 +250,30 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } -static void update_mv(vp9_reader *r, vp9_prob *p, vp9_prob upd_p) { - if (vp9_read(r, upd_p)) +static void update_mv(vp9_reader *r, vp9_prob *p) { + if (vp9_read(r, VP9_NMV_UPDATE_PROB)) *p = (vp9_read_literal(r, 7) << 1) | 1; } -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) { +static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { int i, j, k; for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->joints[j]); for (i = 0; i < 2; ++i) { nmv_component *const comp = &mvc->comps[i]; - update_mv(r, &comp->sign, VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->sign); + for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->classes[j]); for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0[j]); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->bits[j]); } for (i = 0; i < 2; ++i) { @@ -275,23 +281,23 @@ static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) { for (j = 0; j < CLASS0_SIZE; ++j) for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0_fp[j][k]); for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->fp[j]); } - if (usehp) { + if (allow_hp) { for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); - update_mv(r, &mvc->comps[i].hp, VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->comps[i].class0_hp); + update_mv(r, &mvc->comps[i].hp); } } } // Read the referncence frame -static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r, - int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { +static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, + int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; FRAME_CONTEXT *const fc = &cm->fc; @@ -320,18 +326,19 @@ static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r, ref_frame[fix_ref_idx] = cm->comp_fixed_ref; ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { - const int ref1_ctx = vp9_get_pred_context_single_ref_p1(xd); - ref_frame[1] = NONE; - if (vp9_read(r, fc->single_ref_prob[ref1_ctx][0])) { - const int ref2_ctx = vp9_get_pred_context_single_ref_p2(xd); - const int b = vp9_read(r, fc->single_ref_prob[ref2_ctx][1]); - ref_frame[0] = b ? ALTREF_FRAME : GOLDEN_FRAME; - counts->single_ref[ref1_ctx][0][1]++; - counts->single_ref[ref2_ctx][1][b]++; + const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); + const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); + const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + ++counts->single_ref[ctx1][1][bit1]; } else { ref_frame[0] = LAST_FRAME; - counts->single_ref[ref1_ctx][0][0]++; } + + ref_frame[1] = NONE; } } } @@ -359,16 +366,6 @@ static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { return mode; } -static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { - dst->as_int = src->as_int; - clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, - mb_to_bottom_edge); -} - static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; @@ -380,32 +377,35 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( return vp9_switchable_interp[index]; } -static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi, - vp9_reader *r) { +static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_SIZE_SB8X8) { - const int size_group = MIN(3, MIN(bwl, bhl)); + const int size_group = size_group_lookup[bsize]; mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]); cm->counts.y_mode[size_group][mbmi->mode]++; } else { // Only 4x4, 4x8, 8x4 blocks - const int bw = 1 << bwl, bh = 1 << bhl; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]); mi->bmi[ib].as_mode = b_mode; cm->counts.y_mode[0][b_mode]++; - if (bh == 2) + if (num_4x4_h == 2) mi->bmi[ib + 2].as_mode = b_mode; - if (bw == 2) + if (num_4x4_w == 2) mi->bmi[ib + 1].as_mode = b_mode; } } @@ -416,203 +416,197 @@ static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi, cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++; } -static MV_REFERENCE_FRAME read_reference_frame(VP9D_COMP *pbi, int segment_id, - vp9_reader *r) { +static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - MV_REFERENCE_FRAME ref; - if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) { - const int ctx = vp9_get_pred_context_intra_inter(xd); - ref = (MV_REFERENCE_FRAME) - vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); - cm->counts.intra_inter[ctx][ref != INTRA_FRAME]++; + if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) { + return vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) != + INTRA_FRAME; } else { - ref = (MV_REFERENCE_FRAME) vp9_get_segdata(&xd->seg, segment_id, - SEG_LVL_REF_FRAME) != INTRA_FRAME; + const int ctx = vp9_get_pred_context_intra_inter(xd); + const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); + ++cm->counts.intra_inter[ctx][is_inter]; + return is_inter; } - return ref; } -static void read_inter_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, - int mi_row, int mi_col, vp9_reader *r) { +static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; nmv_context *const nmvc = &cm->fc.nmvc; MB_MODE_INFO *const mbmi = &mi->mbmi; - int_mv *const mv0 = &mbmi->mv[0]; int_mv *const mv1 = &mbmi->mv[1]; - const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; - const int bw = 1 << b_width_log2(bsize); - const int bh = 1 << b_height_log2(bsize); - - int idx, idy; + const BLOCK_SIZE_TYPE bsize = mbmi->sb_type; + const int allow_hp = xd->allow_high_precision_mv; - mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); - mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - mbmi->ref_frame[0] = read_reference_frame(pbi, mbmi->segment_id, r); - mbmi->ref_frame[1] = NONE; - mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, - (!mbmi->mb_skip_coeff || mbmi->ref_frame[0] == INTRA_FRAME), r); + int_mv nearest, nearby, best_mv; + int_mv nearest_second, nearby_second, best_mv_second; + uint8_t inter_mode_ctx; + MV_REFERENCE_FRAME ref0, ref1; - if (mbmi->ref_frame[0] != INTRA_FRAME) { - int_mv nearest, nearby, best_mv; - int_mv nearest_second, nearby_second, best_mv_second; - vp9_prob *mv_ref_p; - MV_REFERENCE_FRAME ref0, ref1; + read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame); + ref0 = mbmi->ref_frame[0]; + ref1 = mbmi->ref_frame[1]; - read_ref_frame(pbi, r, mbmi->segment_id, mbmi->ref_frame); - ref0 = mbmi->ref_frame[0]; - ref1 = mbmi->ref_frame[1]; + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias, + mi_row, mi_col); - vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias); + inter_mode_ctx = mbmi->mb_mode_context[ref0]; - mv_ref_p = cm->fc.inter_mode_probs[mbmi->mb_mode_context[ref0]]; + if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) + mbmi->mode = ZEROMV; + else if (bsize >= BLOCK_SIZE_SB8X8) + mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx); - if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - mbmi->mode = ZEROMV; - } else if (bsize >= BLOCK_SIZE_SB8X8) { - mbmi->mode = read_inter_mode(r, mv_ref_p); - vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref0]); - } - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = DC_PRED; - // nearest, nearby - if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby); - best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int; - } + // nearest, nearby + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby); + best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int; + } - mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE - ? read_switchable_filter_type(pbi, r) - : cm->mcomp_filter_type; + mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE + ? read_switchable_filter_type(pbi, r) + : cm->mcomp_filter_type; - if (ref1 > INTRA_FRAME) { - vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias); + if (ref1 > INTRA_FRAME) { + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias, + mi_row, mi_col); - if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], - &nearest_second, &nearby_second); - best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int; - } + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], + &nearest_second, &nearby_second); + best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int; } + } + if (bsize < BLOCK_SIZE_SB8X8) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 + int idx, idy; + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + int_mv blockmv, secondmv; + const int j = idy * 2 + idx; + const int b_mode = read_inter_mode(cm, r, inter_mode_ctx); - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { - int_mv blockmv, secondmv; - const int j = idy * 2 + idx; - const int blockmode = read_inter_mode(r, mv_ref_p); + if (b_mode == NEARESTMV || b_mode == NEARMV) { + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0, + mi_row, mi_col); - vp9_accum_mv_refs(cm, blockmode, mbmi->mb_mode_context[ref0]); - if (blockmode == NEARESTMV || blockmode == NEARMV) { - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0); - if (ref1 > 0) - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, - &nearby_second, j, 1); - } - - switch (blockmode) { - case NEWMV: - read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - - if (ref1 > 0) - read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - break; - case NEARESTMV: - blockmv.as_int = nearest.as_int; - if (ref1 > 0) - secondmv.as_int = nearest_second.as_int; - break; - case NEARMV: - blockmv.as_int = nearby.as_int; - if (ref1 > 0) - secondmv.as_int = nearby_second.as_int; - break; - case ZEROMV: - blockmv.as_int = 0; - if (ref1 > 0) - secondmv.as_int = 0; - break; - default: - assert(!"Invalid inter mode value"); - } - mi->bmi[j].as_mv[0].as_int = blockmv.as_int; if (ref1 > 0) - mi->bmi[j].as_mv[1].as_int = secondmv.as_int; - - if (bh == 2) - mi->bmi[j + 2] = mi->bmi[j]; - if (bw == 2) - mi->bmi[j + 1] = mi->bmi[j]; - mi->mbmi.mode = blockmode; + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, + &nearby_second, j, 1, + mi_row, mi_col); } - } - mv0->as_int = mi->bmi[3].as_mv[0].as_int; - mv1->as_int = mi->bmi[3].as_mv[1].as_int; - } else { - const int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - const int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - const int mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - const int mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - switch (mbmi->mode) { - case NEARMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - break; - - case NEARESTMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - break; - - case ZEROMV: - mv0->as_int = 0; - if (ref1 > 0) - mv1->as_int = 0; - break; + switch (b_mode) { + case NEWMV: + read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, + &cm->counts.mv, allow_hp); - case NEWMV: - read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, - xd->allow_high_precision_mv); - if (ref1 > 0) - read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - break; - default: - assert(!"Invalid inter mode value"); + if (ref1 > 0) + read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, + &cm->counts.mv, allow_hp); + break; + case NEARESTMV: + blockmv.as_int = nearest.as_int; + if (ref1 > 0) + secondmv.as_int = nearest_second.as_int; + break; + case NEARMV: + blockmv.as_int = nearby.as_int; + if (ref1 > 0) + secondmv.as_int = nearby_second.as_int; + break; + case ZEROMV: + blockmv.as_int = 0; + if (ref1 > 0) + secondmv.as_int = 0; + break; + default: + assert(!"Invalid inter mode value"); + } + mi->bmi[j].as_mv[0].as_int = blockmv.as_int; + if (ref1 > 0) + mi->bmi[j].as_mv[1].as_int = secondmv.as_int; + + if (num_4x4_h == 2) + mi->bmi[j + 2] = mi->bmi[j]; + if (num_4x4_w == 2) + mi->bmi[j + 1] = mi->bmi[j]; + mi->mbmi.mode = b_mode; } } + + mv0->as_int = mi->bmi[3].as_mv[0].as_int; + mv1->as_int = mi->bmi[3].as_mv[1].as_int; } else { - mv0->as_int = 0; // required for left and above block mv - read_intra_block_modes(pbi, mi, r); + switch (mbmi->mode) { + case NEARMV: + mv0->as_int = nearby.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearby_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } + break; + + case NEARESTMV: + mv0->as_int = nearest.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearest_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } + break; + + case ZEROMV: + mv0->as_int = 0; + if (ref1 > 0) + mv1->as_int = 0; + break; + + case NEWMV: + read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp); + if (ref1 > 0) + read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv, + allow_hp); + break; + default: + assert(!"Invalid inter mode value"); + } } } +static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + int mi_row, int mi_col, vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; + MB_MODE_INFO *const mbmi = &mi->mbmi; + int inter_block; + + mbmi->mv[0].as_int = 0; + mbmi->mv[1].as_int = 0; + mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); + mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); + inter_block = read_is_inter_block(pbi, mbmi->segment_id, r); + mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type, + !mbmi->mb_skip_coeff || !inter_block, r); + + if (inter_block) + read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r); + else + read_intra_block_mode_info(pbi, mi, r); +} + static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { int i; @@ -690,9 +684,9 @@ void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) { int x, y; if (cm->frame_type == KEY_FRAME || cm->intra_only) - read_intra_mode_info(pbi, mi, mi_row, mi_col, r); + read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r); else - read_inter_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r); for (y = 0; y < y_mis; y++) for (x = !y; x < x_mis; x++) diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h index 4073d9e04..462d2e398 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.h +++ b/libvpx/vp9/decoder/vp9_decodemv.h @@ -12,6 +12,7 @@ #define VP9_DECODER_VP9_DECODEMV_H_ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_dboolhuff.h" void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r); diff --git a/libvpx/vp9/decoder/vp9_decodframe.c b/libvpx/vp9/decoder/vp9_decodframe.c index ffec8ea44..feb602402 100644 --- a/libvpx/vp9/decoder/vp9_decodframe.c +++ b/libvpx/vp9/decoder/vp9_decodframe.c @@ -31,8 +31,11 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_dsubexp.h" +#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" +#include "vp9/decoder/vp9_thread.h" +#include "vp9/decoder/vp9_treereader.h" static int read_be32(const uint8_t *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; @@ -59,17 +62,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { int i, j; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) + for (j = 0; j < TX_SIZES - 3; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) + for (j = 0; j < TX_SIZES - 2; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) + for (j = 0; j < TX_SIZES - 1; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); } @@ -138,8 +141,8 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, const int mode = plane == 0 ? mi->mbmi.mode : mi->mbmi.uv_mode; - if (plane == 0 && mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - assert(bsize == BLOCK_SIZE_SB8X8); + if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) { + assert(bsize == BLOCK_8X8); b_mode = mi->bmi[raster_block].as_mode; } else { b_mode = mode; @@ -223,7 +226,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const int less8x8 = bsize < BLOCK_SIZE_SB8X8; + const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; if (less8x8) @@ -234,12 +237,12 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_read_mode_info(pbi, mi_row, mi_col, r); if (less8x8) - bsize = BLOCK_SIZE_SB8X8; + bsize = BLOCK_8X8; // Has to be called after set_offsets mbmi = &xd->mode_info_context->mbmi; - if (mbmi->ref_frame[0] == INTRA_FRAME) { + if (!is_inter_block(mbmi)) { // Intra reconstruction decode_tokens(pbi, bsize, r); foreach_transformed_block(xd, bsize, decode_block_intra, xd); @@ -280,12 +283,12 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols) return; - if (bsize < BLOCK_SIZE_SB8X8) { + if (bsize < BLOCK_8X8) { if (xd->ab_index != 0) return; } else { int pl; - const int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize); + const int idx = check_bsize_coverage(pc, mi_row, mi_col, bsize); set_partition_seg_context(pc, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -332,8 +335,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, } // update partition context - if (bsize >= BLOCK_SIZE_SB8X8 && - (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) { set_partition_seg_context(pc, xd, mi_row, mi_col); update_partition_context(xd, subsize, bsize); } @@ -499,7 +502,7 @@ static INTERPOLATIONFILTERTYPE read_interp_filter_type( : vp9_rb_read_literal(rb, 2); } -static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, +static void read_frame_size(struct vp9_read_bit_buffer *rb, int *width, int *height) { const int w = vp9_rb_read_literal(rb, 16) + 1; const int h = vp9_rb_read_literal(rb, 16) + 1; @@ -507,12 +510,11 @@ static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, *height = h; } -static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; +static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->display_width = cm->width; cm->display_height = cm->height; if (vp9_rb_read_bit(rb)) - read_frame_size(cm, rb, &cm->display_width, &cm->display_height); + read_frame_size(rb, &cm->display_width, &cm->display_height); } static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { @@ -548,10 +550,9 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { static void setup_frame_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; int width, height; - read_frame_size(cm, rb, &width, &height); - setup_display_size(pbi, rb); + read_frame_size(rb, &width, &height); + setup_display_size(&pbi->common, rb); apply_frame_size(pbi, width, height); } @@ -572,21 +573,29 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, } if (!found) - read_frame_size(cm, rb, &width, &height); + read_frame_size(rb, &width, &height); if (!width || !height) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - setup_display_size(pbi, rb); + setup_display_size(cm, rb); apply_frame_size(pbi, width, height); } static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { + const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const pc = &pbi->common; int mi_row, mi_col; if (pbi->do_loopfilter_inline) { + if (num_threads > 1) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + lf_data->cm = pc; + lf_data->xd = pbi->mb; + lf_data->y_only = 0; + } vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level); } @@ -597,21 +606,37 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context)); for (mi_col = pc->cur_tile_mi_col_start; mi_col < pc->cur_tile_mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64); + decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64); } if (pbi->do_loopfilter_inline) { - YV12_BUFFER_CONFIG *const fb = - &pbi->common.yv12_fb[pbi->common.new_fb_idx]; // delay the loopfilter by 1 macroblock row. const int lf_start = mi_row - MI_BLOCK_SIZE; if (lf_start < 0) continue; - vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0); + + if (num_threads > 1) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + pbi->lf_worker.hook = vp9_loop_filter_worker; + vp9_worker_launch(&pbi->lf_worker); + } else { + YV12_BUFFER_CONFIG *const fb = + &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0); + } } } if (pbi->do_loopfilter_inline) { YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + if (num_threads > 1) { + // TODO(jzern): since the loop filter is delayed one mb row, this will be + // forced to wait for the last row scheduled in the for loop. + vp9_worker_sync(&pbi->lf_worker); + } vp9_loop_filter_rows(fb, pc, &pbi->mb, mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0); } @@ -994,7 +1019,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { if (!keyframe && !pc->intra_only) { vp9_adapt_mode_probs(pc); - vp9_adapt_mode_context(pc); vp9_adapt_mv_probs(pc, xd->allow_high_precision_mv); } } diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index 01c1db0b7..002164307 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -15,8 +15,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_treereader.h" #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 @@ -73,7 +75,7 @@ DECLARE_ALIGNED(16, extern const uint8_t, #define WRITE_COEF_CONTINUE(val, token) \ { \ qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ - dq[c > 0] / (1 + (txfm_size == TX_32X32)); \ + dq[c > 0] / (1 + (tx_size == TX_32X32)); \ INCREMENT_COUNT(token); \ c++; \ continue; \ @@ -88,33 +90,24 @@ DECLARE_ALIGNED(16, extern const uint8_t, static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, - TX_SIZE txfm_size, const int16_t *dq, + TX_SIZE tx_size, const int16_t *dq, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; ENTROPY_CONTEXT above_ec, left_ec; - int pt, c = 0; - int band; - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; + const int ref = is_inter_block(&xd->mode_info_context->mbmi); + int band, pt, c = 0; + vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = + fc->coef_probs[tx_size][type][ref]; vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - }; - + uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } }; vp9_prob *prob; - vp9_coeff_count_model *coef_counts; - const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; + vp9_coeff_count_model *coef_counts = counts->coef[tx_size]; const int16_t *scan, *nb; uint8_t token_cache[1024]; const uint8_t * band_translate; - coef_probs = fc->coef_probs[txfm_size][type][ref]; - coef_counts = counts->coef[txfm_size]; - switch (txfm_size) { + + switch (tx_size) { default: case TX_4X4: { scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx)); @@ -125,22 +118,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, } case TX_8X8: { scan = get_scan_8x8(get_tx_type_8x8(type, xd)); - above_ec = (A[0] + A[1]) != 0; - left_ec = (L[0] + L[1]) != 0; + above_ec = !!*(uint16_t *)A; + left_ec = !!*(uint16_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { scan = get_scan_16x16(get_tx_type_16x16(type, xd)); - above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; + above_ec = !!*(uint32_t *)A; + left_ec = !!*(uint32_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: scan = vp9_default_scan_32x32; - above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; + above_ec = !!*(uint64_t *)A; + left_ec = !!*(uint64_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } @@ -157,7 +150,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; - counts->eob_branch[txfm_size][type][ref][band][pt]++; + counts->eob_branch[tx_size][type][ref][band][pt]++; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; @@ -276,7 +269,7 @@ static void decode_block(int plane, int block, const int mod = bw - ss_tx_size - pd->subsampling_x; const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size; const int loff = (off >> mod) << ss_tx_size; - + const int tx_size_in_blocks = 1 << ss_tx_size; ENTROPY_CONTEXT *A = pd->above_context + aoff; ENTROPY_CONTEXT *L = pd->left_context + loff; const int eob = decode_coefs(&arg->pbi->common, xd, arg->r, block, @@ -285,10 +278,11 @@ static void decode_block(int plane, int block, ss_tx_size, pd->dequant, A, L); if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff, + A, L); } else { int pt; - for (pt = 0; pt < (1 << ss_tx_size); pt++) + for (pt = 0; pt < tx_size_in_blocks; pt++) A[pt] = L[pt] = eob > 0; } pd->eobs[block] = eob; diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h index d46b59635..f98fe8d4c 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.h +++ b/libvpx/vp9/decoder/vp9_detokenize.h @@ -13,6 +13,7 @@ #define VP9_DECODER_VP9_DETOKENIZE_H_ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_dboolhuff.h" int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE_TYPE bsize); diff --git a/libvpx/vp9/decoder/vp9_idct_blk.c b/libvpx/vp9/decoder/vp9_idct_blk.c index 0217919da..395e636b8 100644 --- a/libvpx/vp9/decoder/vp9_idct_blk.c +++ b/libvpx/vp9/decoder/vp9_idct_blk.c @@ -93,15 +93,11 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob) { if (eob == 1) { // DC only DCT coefficient - int16_t in = input[0]; - int16_t out; - - // Note: the idct1 will need to be modified accordingly whenever - // vp9_short_idct8x8_c() is modified. - vp9_short_idct1_8x8_c(&in, &out); + vp9_short_idct8x8_1_add(input, dest, stride); input[0] = 0; - - vp9_add_constant_residual_8x8(out, dest, stride); + } else if (eob <= 10) { + vp9_short_idct10_8x8_add(input, dest, stride); + vpx_memset(input, 0, 128); } else { vp9_short_idct8x8_add(input, dest, stride); vpx_memset(input, 0, 128); @@ -127,14 +123,11 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob) { if (eob == 1) { /* DC only DCT coefficient. */ - int16_t in = input[0]; - int16_t out; - /* Note: the idct1 will need to be modified accordingly whenever - * vp9_short_idct16x16() is modified. */ - vp9_short_idct1_16x16_c(&in, &out); + vp9_short_idct16x16_1_add(input, dest, stride); input[0] = 0; - - vp9_add_constant_residual_16x16(out, dest, stride); + } else if (eob <= 10) { + vp9_short_idct10_16x16_add(input, dest, stride); + vpx_memset(input, 0, 512); } else { vp9_short_idct16x16_add(input, dest, stride); vpx_memset(input, 0, 512); diff --git a/libvpx/vp9/decoder/vp9_onyxd_if.c b/libvpx/vp9/decoder/vp9_onyxd_if.c index cb7292006..5a01dd790 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_if.c +++ b/libvpx/vp9/decoder/vp9_onyxd_if.c @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include <stdio.h> #include <assert.h> +#include <limits.h> +#include <stdio.h> #include "vp9/common/vp9_onyxc_int.h" #if CONFIG_POSTPROC @@ -114,7 +114,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { if (!pbi) return NULL; - vpx_memset(pbi, 0, sizeof(VP9D_COMP)); + vp9_zero(*pbi); if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; @@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { pbi->common.error.setjmp = 0; pbi->decoded_key_frame = 0; + if (pbi->oxcf.max_threads > 1) { + vp9_worker_init(&pbi->lf_worker); + pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData)); + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; + if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) { + vp9_remove_decompressor(pbi); + return NULL; + } + } + return pbi; } @@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) { vpx_free(pbi->common.last_frame_seg_map); vp9_remove_common(&pbi->common); + vp9_worker_end(&pbi->lf_worker); + vpx_free(pbi->lf_worker.data1); vpx_free(pbi); } diff --git a/libvpx/vp9/decoder/vp9_onyxd_int.h b/libvpx/vp9/decoder/vp9_onyxd_int.h index 476006616..a051971a1 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_int.h +++ b/libvpx/vp9/decoder/vp9_onyxd_int.h @@ -14,10 +14,8 @@ #include "./vpx_config.h" #include "vp9/common/vp9_onyxc_int.h" - -#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd.h" -#include "vp9/decoder/vp9_treereader.h" +#include "vp9/decoder/vp9_thread.h" typedef struct VP9Decompressor { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -40,6 +38,7 @@ typedef struct VP9Decompressor { int initial_height; int do_loopfilter_inline; // apply loopfilter to available rows immediately + VP9Worker lf_worker; } VP9D_COMP; #endif // VP9_DECODER_VP9_TREEREADER_H_ diff --git a/libvpx/vp9/decoder/vp9_thread.c b/libvpx/vp9/decoder/vp9_thread.c new file mode 100644 index 000000000..dc3b68196 --- /dev/null +++ b/libvpx/vp9/decoder/vp9_thread.c @@ -0,0 +1,248 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c + + +#include <assert.h> +#include <string.h> // for memset() +#include "./vp9_thread.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +#include <process.h> + +// _beginthreadex requires __stdcall +#define THREADFN unsigned int __stdcall +#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) + +static int pthread_create(pthread_t* const thread, const void* attr, + unsigned int (__stdcall *start)(void*), void* arg) { + (void)attr; + *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ + 0, /* unsigned stack_size */ + start, + arg, + 0, /* unsigned initflag */ + NULL); /* unsigned *thrdaddr */ + if (*thread == NULL) return 1; + SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); + return 0; +} + +static int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || + CloseHandle(thread) == 0); +} + +// Mutex +static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) { + (void)mutexattr; + InitializeCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_lock(pthread_mutex_t* const mutex) { + EnterCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_unlock(pthread_mutex_t* const mutex) { + LeaveCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_destroy(pthread_mutex_t* const mutex) { + DeleteCriticalSection(mutex); + return 0; +} + +// Condition +static int pthread_cond_destroy(pthread_cond_t* const condition) { + int ok = 1; + ok &= (CloseHandle(condition->waiting_sem_) != 0); + ok &= (CloseHandle(condition->received_sem_) != 0); + ok &= (CloseHandle(condition->signal_event_) != 0); + return !ok; +} + +static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) { + (void)cond_attr; + condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); + if (condition->waiting_sem_ == NULL || + condition->received_sem_ == NULL || + condition->signal_event_ == NULL) { + pthread_cond_destroy(condition); + return 1; + } + return 0; +} + +static int pthread_cond_signal(pthread_cond_t* const condition) { + int ok = 1; + if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { + // a thread is waiting in pthread_cond_wait: allow it to be notified + ok = SetEvent(condition->signal_event_); + // wait until the event is consumed so the signaler cannot consume + // the event via its own pthread_cond_wait. + ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != + WAIT_OBJECT_0); + } + return !ok; +} + +static int pthread_cond_wait(pthread_cond_t* const condition, + pthread_mutex_t* const mutex) { + int ok; + // note that there is a consumer available so the signal isn't dropped in + // pthread_cond_signal + if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) + return 1; + // now unlock the mutex so pthread_cond_signal may be issued + pthread_mutex_unlock(mutex); + ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == + WAIT_OBJECT_0); + ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); + pthread_mutex_lock(mutex); + return !ok; +} + +#else // _WIN32 +# define THREADFN void* +# define THREAD_RETURN(val) val +#endif + +//------------------------------------------------------------------------------ + +static THREADFN thread_loop(void *ptr) { // thread loop + VP9Worker* const worker = (VP9Worker*)ptr; + int done = 0; + while (!done) { + pthread_mutex_lock(&worker->mutex_); + while (worker->status_ == OK) { // wait in idling mode + pthread_cond_wait(&worker->condition_, &worker->mutex_); + } + if (worker->status_ == WORK) { + if (worker->hook) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } + worker->status_ = OK; + } else if (worker->status_ == NOT_OK) { // finish the worker + done = 1; + } + // signal to the main thread that we're done (for Sync()) + pthread_cond_signal(&worker->condition_); + pthread_mutex_unlock(&worker->mutex_); + } + return THREAD_RETURN(NULL); // Thread is finished +} + +// main thread state control +static void change_state(VP9Worker* const worker, + VP9WorkerStatus new_status) { + // no-op when attempting to change state on a thread that didn't come up + if (worker->status_ < OK) return; + + pthread_mutex_lock(&worker->mutex_); + // wait for the worker to finish + while (worker->status_ != OK) { + pthread_cond_wait(&worker->condition_, &worker->mutex_); + } + // assign new status and release the working thread if needed + if (new_status != OK) { + worker->status_ = new_status; + pthread_cond_signal(&worker->condition_); + } + pthread_mutex_unlock(&worker->mutex_); +} + +#endif + +//------------------------------------------------------------------------------ + +void vp9_worker_init(VP9Worker* const worker) { + memset(worker, 0, sizeof(*worker)); + worker->status_ = NOT_OK; +} + +int vp9_worker_sync(VP9Worker* const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, OK); +#endif + assert(worker->status_ <= OK); + return !worker->had_error; +} + +int vp9_worker_reset(VP9Worker* const worker) { + int ok = 1; + worker->had_error = 0; + if (worker->status_ < OK) { +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&worker->mutex_, NULL) || + pthread_cond_init(&worker->condition_, NULL)) { + return 0; + } + pthread_mutex_lock(&worker->mutex_); + ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker); + if (ok) worker->status_ = OK; + pthread_mutex_unlock(&worker->mutex_); +#else + worker->status_ = OK; +#endif + } else if (worker->status_ > OK) { + ok = vp9_worker_sync(worker); + } + assert(!ok || (worker->status_ == OK)); + return ok; +} + +void vp9_worker_launch(VP9Worker* const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, WORK); +#else + if (worker->hook) + worker->had_error |= !worker->hook(worker->data1, worker->data2); +#endif +} + +void vp9_worker_end(VP9Worker* const worker) { + if (worker->status_ >= OK) { +#if CONFIG_MULTITHREAD + change_state(worker, NOT_OK); + pthread_join(worker->thread_, NULL); + pthread_mutex_destroy(&worker->mutex_); + pthread_cond_destroy(&worker->condition_); +#else + worker->status_ = NOT_OK; +#endif + } + assert(worker->status_ == NOT_OK); +} + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/libvpx/vp9/decoder/vp9_thread.h b/libvpx/vp9/decoder/vp9_thread.h new file mode 100644 index 000000000..a8f7e046a --- /dev/null +++ b/libvpx/vp9/decoder/vp9_thread.h @@ -0,0 +1,93 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h + + +#ifndef VP9_DECODER_VP9_THREAD_H_ +#define VP9_DECODER_VP9_THREAD_H_ + +#include "vpx_config.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) + +#include <windows.h> +typedef HANDLE pthread_t; +typedef CRITICAL_SECTION pthread_mutex_t; +typedef struct { + HANDLE waiting_sem_; + HANDLE received_sem_; + HANDLE signal_event_; +} pthread_cond_t; + +#else + +#include <pthread.h> + +#endif /* _WIN32 */ +#endif /* CONFIG_MULTITHREAD */ + +// State of the worker thread object +typedef enum { + NOT_OK = 0, // object is unusable + OK, // ready to work + WORK // busy finishing the current task +} VP9WorkerStatus; + +// Function to be called by the worker thread. Takes two opaque pointers as +// arguments (data1 and data2), and should return false in case of error. +typedef int (*VP9WorkerHook)(void*, void*); + +// Synchronize object used to launch job in the worker thread +typedef struct { +#if CONFIG_MULTITHREAD + pthread_mutex_t mutex_; + pthread_cond_t condition_; + pthread_t thread_; +#endif + VP9WorkerStatus status_; + VP9WorkerHook hook; // hook to call + void* data1; // first argument passed to 'hook' + void* data2; // second argument passed to 'hook' + int had_error; // return value of the last call to 'hook' +} VP9Worker; + +// Must be called first, before any other method. +void vp9_worker_init(VP9Worker* const worker); +// Must be called to initialize the object and spawn the thread. Re-entrant. +// Will potentially launch the thread. Returns false in case of error. +int vp9_worker_reset(VP9Worker* const worker); +// Makes sure the previous work is finished. Returns true if worker->had_error +// was not set and no error condition was triggered by the working thread. +int vp9_worker_sync(VP9Worker* const worker); +// Triggers the thread to call hook() with data1 and data2 argument. These +// hook/data1/data2 can be changed at any time before calling this function, +// but not be changed afterward until the next call to vp9_worker_sync(). +void vp9_worker_launch(VP9Worker* const worker); +// Kill the thread and terminate the object. To use the object again, one +// must call vp9_worker_reset() again. +void vp9_worker_end(VP9Worker* const worker); + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif /* VP9_DECODER_VP9_THREAD_H_ */ diff --git a/libvpx/vp9/decoder/vp9_treereader.h b/libvpx/vp9/decoder/vp9_treereader.h index 4535688ea..710cc4cd0 100644 --- a/libvpx/vp9/decoder/vp9_treereader.h +++ b/libvpx/vp9/decoder/vp9_treereader.h @@ -15,7 +15,6 @@ #include "vp9/common/vp9_treecoder.h" #include "vp9/decoder/vp9_dboolhuff.h" -#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8)) #define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value)) // Intent of tree data structure is to make decoding trivial. diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c index ad0f6c531..98ef42074 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.c +++ b/libvpx/vp9/encoder/vp9_bitstream.c @@ -44,16 +44,16 @@ unsigned __int64 Sectionbits[500]; int intra_mode_stats[VP9_INTRA_MODES] [VP9_INTRA_MODES] [VP9_INTRA_MODES]; -vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES]; +vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES]; extern unsigned int active_section; #endif #ifdef MODE_STATS -int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB]; -int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 1]; -int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB - 2]; +int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES]; +int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1]; +int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2]; int64_t switchable_interp_stats[VP9_SWITCHABLE_FILTERS+1] [VP9_SWITCHABLE_FILTERS]; @@ -70,17 +70,17 @@ void init_switchable_interp_stats() { static void update_tx_count_stats(VP9_COMMON *cm) { int i, j; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZE_MAX_SB; j++) { + for (j = 0; j < TX_SIZES; j++) { tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j]; } } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) { + for (j = 0; j < TX_SIZES - 1; j++) { tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j]; } } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) { + for (j = 0; j < TX_SIZES - 2; j++) { tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j]; } } @@ -103,30 +103,30 @@ void write_tx_count_stats() { fclose(fp); printf( - "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB] = {\n"); + "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZES] = {\n"); for (i = 0; i < TX_SIZE_CONTEXTS; i++) { printf(" { "); - for (j = 0; j < TX_SIZE_MAX_SB; j++) { + for (j = 0; j < TX_SIZES; j++) { printf("%"PRId64", ", tx_count_32x32p_stats[i][j]); } printf("},\n"); } printf("};\n"); printf( - "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-1] = {\n"); + "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZES-1] = {\n"); for (i = 0; i < TX_SIZE_CONTEXTS; i++) { printf(" { "); - for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) { + for (j = 0; j < TX_SIZES - 1; j++) { printf("%"PRId64", ", tx_count_16x16p_stats[i][j]); } printf("},\n"); } printf("};\n"); printf( - "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZE_MAX_SB-2] = {\n"); + "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZES-2] = {\n"); for (i = 0; i < TX_SIZE_CONTEXTS; i++) { printf(" { "); - for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) { + for (j = 0; j < TX_SIZES - 2; j++) { printf("%"PRId64", ", tx_count_8x8p_stats[i][j]); } printf("},\n"); @@ -169,7 +169,6 @@ void vp9_encode_unsigned_max(struct vp9_write_bit_buffer *wb, static void update_mode( vp9_writer *w, int n, - const struct vp9_token tok[/* n */], vp9_tree tree, vp9_prob Pnew[/* n-1 */], vp9_prob Pcur[/* n-1 */], @@ -194,20 +193,19 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi, unsigned int bct[VP9_INTRA_MODES - 1][2]; for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_encodings, - vp9_intra_mode_tree, pnew, + update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew, cm->fc.y_mode_prob[j], bct, (unsigned int *)cpi->y_mode_count[j]); } -static void write_selected_txfm_size(const VP9_COMP *cpi, TX_SIZE tx_size, - BLOCK_SIZE_TYPE bsize, vp9_writer *w) { +static void write_selected_tx_size(const VP9_COMP *cpi, TX_SIZE tx_size, + BLOCK_SIZE_TYPE bsize, vp9_writer *w) { const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const vp9_prob *tx_probs = get_tx_probs2(xd, &cpi->common.fc.tx_probs); vp9_write(w, tx_size != TX_4X4, tx_probs[0]); - if (bsize >= BLOCK_SIZE_MB16X16 && tx_size != TX_4X4) { + if (bsize >= BLOCK_16X16 && tx_size != TX_4X4) { vp9_write(w, tx_size != TX_8X8, tx_probs[1]); - if (bsize >= BLOCK_SIZE_SB32X32 && tx_size != TX_8X8) + if (bsize >= BLOCK_32X32 && tx_size != TX_8X8) vp9_write(w, tx_size != TX_16X16, tx_probs[2]); } } @@ -265,12 +263,17 @@ static void update_switchable_interp_probs(VP9_COMP *const cpi, static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) { int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < VP9_INTER_MODES - 1; j++) { + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { + unsigned int branch_ct[VP9_INTER_MODES - 1][2]; + vp9_prob new_prob[VP9_INTER_MODES - 1]; + + vp9_tree_probs_from_distribution(vp9_inter_mode_tree, + new_prob, branch_ct, + pc->counts.inter_mode[i], NEARESTMV); + + for (j = 0; j < VP9_INTER_MODES - 1; ++j) vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j], - VP9_MODE_UPDATE_PROB, - pc->counts.inter_mode[i][j]); - } + VP9_MODE_UPDATE_PROB, branch_ct[j]); } } @@ -393,8 +396,7 @@ static void encode_ref_frame(VP9_COMP *cpi, vp9_writer *bc) { // the reference frame is fully coded by the segment } -static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, - vp9_writer *bc, int mi_row, int mi_col) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; MACROBLOCK *const x = &cpi->mb; @@ -406,6 +408,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, const int segment_id = mi->segment_id; int skip_coeff; const BLOCK_SIZE_TYPE bsize = mi->sb_type; + const int allow_hp = xd->allow_high_precision_mv; x->partition_info = x->pi + (m - pc->mi); @@ -434,7 +437,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, if (bsize >= BLOCK_SIZE_SB8X8 && pc->tx_mode == TX_MODE_SELECT && !(rf != INTRA_FRAME && (skip_coeff || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { - write_selected_txfm_size(cpi, mi->txfm_size, bsize, bc); + write_selected_tx_size(cpi, mi->txfm_size, bsize, bc); } if (rf == INTRA_FRAME) { @@ -443,18 +446,17 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, #endif if (bsize >= BLOCK_SIZE_SB8X8) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bsl = MIN(bwl, bhl); - write_intra_mode(bc, mode, pc->fc.y_mode_prob[MIN(3, bsl)]); + write_intra_mode(bc, mode, pc->fc.y_mode_prob[size_group_lookup[bsize]]); } else { int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode; write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]); } + } } write_intra_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]); } else { @@ -470,7 +472,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_SIZE_SB8X8) { write_sb_mv_ref(bc, mode, mv_ref_p); - vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); + ++pc->counts.inter_mode[mi->mb_mode_context[rf]] + [inter_mode_offset(mode)]; } } @@ -487,8 +490,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, int j; MB_PREDICTION_MODE blockmode; int_mv blockmv; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -496,19 +499,21 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, blockmode = x->partition_info->bmi[j].mode; blockmv = m->bmi[j].as_mv[0]; write_sb_mv_ref(bc, blockmode, mv_ref_p); - vp9_accum_mv_refs(&cpi->common, blockmode, mi->mb_mode_context[rf]); + ++pc->counts.inter_mode[mi->mb_mode_context[rf]] + [inter_mode_offset(blockmode)]; + if (blockmode == NEWMV) { #ifdef ENTROPY_STATS active_section = 11; #endif vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + nmvc, allow_hp); if (mi->ref_frame[1] > INTRA_FRAME) vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv, &mi->best_second_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + nmvc, allow_hp); } } } @@ -516,21 +521,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, #ifdef ENTROPY_STATS active_section = 5; #endif - vp9_encode_mv(cpi, bc, - &mi->mv[0].as_mv, &mi->best_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv, + nmvc, allow_hp); if (mi->ref_frame[1] > INTRA_FRAME) - vp9_encode_mv(cpi, bc, - &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, + nmvc, allow_hp); } } } -static void write_mb_modes_kf(const VP9_COMP *cpi, - MODE_INFO *m, - vp9_writer *bc, int mi_row, int mi_col) { +static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m, + vp9_writer *bc) { const VP9_COMMON *const c = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const int ym = m->mbmi.mode; @@ -543,7 +545,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, write_skip_coeff(cpi, segment_id, m, bc); if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->tx_mode == TX_MODE_SELECT) - write_selected_txfm_size(cpi, m->mbmi.txfm_size, m->mbmi.sb_type, bc); + write_selected_tx_size(cpi, m->mbmi.txfm_size, m->mbmi.sb_type, bc); if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); @@ -552,11 +554,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]); } else { int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type]; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - int i = idy * 2 + idx; + const int i = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, i, mis); const MB_PREDICTION_MODE L = (xd->left_available || idx) ? left_block_mode(m, i) : DC_PRED; @@ -586,12 +588,12 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, 1 << mi_height_log2(m->mbmi.sb_type), mi_col, 1 << mi_width_log2(m->mbmi.sb_type)); if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { - write_mb_modes_kf(cpi, m, bc, mi_row, mi_col); + write_mb_modes_kf(cpi, m, bc); #ifdef ENTROPY_STATS active_section = 8; #endif } else { - pack_inter_mode_mvs(cpi, m, bc, mi_row, mi_col); + pack_inter_mode_mvs(cpi, m, bc); #ifdef ENTROPY_STATS active_section = 1; #endif @@ -625,7 +627,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, if (bsize >= BLOCK_SIZE_SB8X8) { int pl; - const int idx = check_bsize_coverage(cm, xd, mi_row, mi_col, bsize); + const int idx = check_bsize_coverage(cm, mi_row, mi_col, bsize); set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); // encode the partition information @@ -692,8 +694,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, vp9_zero(c->left_seg_context); for (mi_col = c->cur_tile_mi_col_start; mi_col < c->cur_tile_mi_col_end; mi_col += MI_BLOCK_SIZE, m += MI_BLOCK_SIZE) - write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, - BLOCK_SIZE_SB64X64); + write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, BLOCK_64X64); } } @@ -726,12 +727,12 @@ static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) { fclose(f); } -static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE txfm_size) { - vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[txfm_size]; - vp9_coeff_count *coef_counts = cpi->coef_counts[txfm_size]; +static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { + vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size]; + vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] = - cpi->common.counts.eob_branch[txfm_size]; - vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[txfm_size]; + cpi->common.counts.eob_branch[tx_size]; + vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size]; vp9_prob full_probs[ENTROPY_NODES]; int i, j, k, l; @@ -756,9 +757,9 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE txfm_size) { if (!cpi->dummy_packing) { int t; for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters[txfm_size][i][j][k][l][t] += + context_counters[tx_size][i][j][k][l][t] += coef_counts[i][j][k][l][t]; - context_counters[txfm_size][i][j][k][l][MAX_ENTROPY_TOKENS] += + context_counters[tx_size][i][j][k][l][MAX_ENTROPY_TOKENS] += eob_branch_ct[i][j][k][l]; } #endif @@ -1036,15 +1037,15 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { // Probabilities if (cm->tx_mode == TX_MODE_SELECT) { int i, j; - unsigned int ct_8x8p[TX_SIZE_MAX_SB - 3][2]; - unsigned int ct_16x16p[TX_SIZE_MAX_SB - 2][2]; - unsigned int ct_32x32p[TX_SIZE_MAX_SB - 1][2]; + unsigned int ct_8x8p[TX_SIZES - 3][2]; + unsigned int ct_16x16p[TX_SIZES - 2][2]; + unsigned int ct_32x32p[TX_SIZES - 1][2]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p); - for (j = 0; j < TX_SIZE_MAX_SB - 3; j++) + for (j = 0; j < TX_SIZES - 3; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], VP9_MODE_UPDATE_PROB, ct_8x8p[j]); } @@ -1052,14 +1053,14 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p); - for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) + for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j], VP9_MODE_UPDATE_PROB, ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p); - for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) + for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j], VP9_MODE_UPDATE_PROB, ct_32x32p[j]); } @@ -1422,7 +1423,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { for (i = 0; i < NUM_PARTITION_CONTEXTS; ++i) { vp9_prob pnew[PARTITION_TYPES - 1]; unsigned int bct[PARTITION_TYPES - 1][2]; - update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings, + update_mode(&header_bc, PARTITION_TYPES, vp9_partition_tree, pnew, fc->partition_prob[cm->frame_type][i], bct, (unsigned int *)cpi->partition_count[i]); diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h index 4b49b17a2..3e377cf6f 100644 --- a/libvpx/vp9/encoder/vp9_block.h +++ b/libvpx/vp9/encoder/vp9_block.h @@ -47,7 +47,7 @@ typedef struct { int hybrid_pred_diff; int comp_pred_diff; int single_pred_diff; - int64_t txfm_rd_diff[NB_TXFM_MODES]; + int64_t tx_rd_diff[TX_MODES]; int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; // Bit flag for each mode whether it has high error in comparison to others. @@ -72,6 +72,11 @@ struct macroblock_plane { int16_t zbin_extra; }; +/* The [2] dimension is for whether we skip the EOB node (i.e. if previous + * coefficient in this block was zero) or not. */ +typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2] + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + typedef struct macroblock MACROBLOCK; struct macroblock { struct macroblock_plane plane[MAX_MB_PLANE]; @@ -97,6 +102,7 @@ struct macroblock { int mv_best_ref_index[MAX_REF_FRAMES]; unsigned int max_mv_context[MAX_REF_FRAMES]; + unsigned int source_variance; int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -133,7 +139,7 @@ struct macroblock { unsigned char *active_ptr; // note that token_costs is the cost when eob node is skipped - vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][2]; + vp9_coeff_cost token_costs[TX_SIZES]; int optimize; diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index 798adc1f3..66eae41da 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -60,11 +60,28 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); * Eventually this should be replaced by custom no-reference routines, * which will be faster. */ -static const uint8_t VP9_VAR_OFFS[16] = {128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uint8_t VP9_VAR_OFFS[64] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128 +}; + +static unsigned int get_sb_variance(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bs) { + unsigned int var, sse; + var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, + x->plane[0].src.stride, + VP9_VAR_OFFS, 0, &sse); + return var >> num_pels_log2_lookup[bs]; +} // Original activity measure from Tim T's code. -static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) { +static unsigned int tt_activity_measure(MACROBLOCK *x) { unsigned int act; unsigned int sse; /* TODO: This could also be done over smaller areas (8x8), but that would @@ -106,7 +123,7 @@ static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x, mb_activity = alt_activity_measure(cpi, x, use_dc_pred); } else { // Original activity measure from Tim T's code. - mb_activity = tt_activity_measure(cpi, x); + mb_activity = tt_activity_measure(x); } if (mb_activity < VP9_ACTIVITY_AVG_MIN) @@ -323,8 +340,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mb_mode_index = ctx->best_mode_index; const int mis = cpi->common.mode_info_stride; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; assert(mi->mbmi.mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); @@ -345,13 +362,13 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } // FIXME(rbultje) I'm pretty sure this should go to the end of this block // (i.e. after the output_enabled) - if (bsize < BLOCK_SIZE_SB32X32) { - if (bsize < BLOCK_SIZE_MB16X16) - ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8]; - ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; + if (bsize < BLOCK_32X32) { + if (bsize < BLOCK_16X16) + ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8]; + ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16]; } - if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) { + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { *x->partition_info = ctx->partition_info; mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; @@ -362,9 +379,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, return; if (!vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - for (i = 0; i < NB_TXFM_MODES; i++) { - cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i]; - } + for (i = 0; i < TX_MODES; i++) + cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i]; } if (cpi->common.frame_type == KEY_FRAME) { @@ -395,7 +411,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } else { // Note how often each mode chosen as best cpi->mode_chosen_counts[mb_mode_index]++; - if (mbmi->ref_frame[0] != INTRA_FRAME + if (is_inter_block(mbmi) && (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) { int_mv best_mv, best_second_mv; const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0]; @@ -465,6 +481,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, const int mb_row = mi_row >> 1; const int mb_col = mi_col >> 1; const int idx_map = mb_row * cm->mb_cols + mb_col; + const struct segmentation *const seg = &xd->seg; int i; // entropy context structures @@ -514,16 +531,16 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, x->rdmult = cpi->RDMULT; /* segment ID */ - if (xd->seg.enabled) { - uint8_t *map = xd->seg.update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; + if (seg->enabled) { + uint8_t *map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); vp9_mb_init_quantizer(cpi, x); - if (xd->seg.enabled && cpi->seg0_cnt > 0 - && !vp9_segfeature_active(&xd->seg, 0, SEG_LVL_REF_FRAME) - && vp9_segfeature_active(&xd->seg, 1, SEG_LVL_REF_FRAME)) { + if (seg->enabled && cpi->seg0_cnt > 0 + && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) + && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; } else { const int y = mb_row & ~3; @@ -537,8 +554,11 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs; } + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; } else { mbmi->segment_id = 0; + x->encode_breakout = cpi->oxcf.encode_breakout; } } @@ -552,12 +572,17 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, x->rd_search = 1; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) return; + } set_offsets(cpi, mi_row, mi_col, bsize); xd->mode_info_context->mbmi.sb_type = bsize; + + x->source_variance = get_sb_variance(cpi, x, bsize); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -571,12 +596,12 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, bsize, ctx, best_rd); } -static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; +static void update_stats(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO * const mbmi = &mi->mbmi; + MB_MODE_INFO *const mbmi = &mi->mbmi; if (cm->frame_type != KEY_FRAME) { const int seg_ref_active = vp9_segfeature_active(&xd->seg, mbmi->segment_id, @@ -612,38 +637,38 @@ static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) { } // TODO(jingning): the variables used here are little complicated. need further -// refactoring on organizing the the temporary buffers, when recursive +// refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD * const xd = &x->e_mbd; switch (bsize) { - case BLOCK_SIZE_SB64X64: + case BLOCK_64X64: return &x->sb64_context; - case BLOCK_SIZE_SB64X32: + case BLOCK_64X32: return &x->sb64x32_context[xd->sb_index]; - case BLOCK_SIZE_SB32X64: + case BLOCK_32X64: return &x->sb32x64_context[xd->sb_index]; - case BLOCK_SIZE_SB32X32: + case BLOCK_32X32: return &x->sb32_context[xd->sb_index]; - case BLOCK_SIZE_SB32X16: + case BLOCK_32X16: return &x->sb32x16_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB16X32: + case BLOCK_16X32: return &x->sb16x32_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_MB16X16: + case BLOCK_16X16: return &x->mb_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X4: + case BLOCK_8X4: return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB4X8: + case BLOCK_4X8: return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_AB4X4: + case BLOCK_4X4: return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; default: assert(0); @@ -655,13 +680,13 @@ static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD *xd = &x->e_mbd; switch (bsize) { - case BLOCK_SIZE_SB64X64: + case BLOCK_64X64: return &x->sb64_partitioning; - case BLOCK_SIZE_SB32X32: + case BLOCK_32X32: return &x->sb_partitioning[xd->sb_index]; - case BLOCK_SIZE_MB16X16: + case BLOCK_16X16: return &x->mb_partitioning[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index]; default: assert(0); @@ -674,12 +699,12 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE_TYPE bsize) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int p; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { @@ -705,12 +730,12 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE_TYPE bsize) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; + const VP9_COMMON *const cm = &cpi->common; + const MACROBLOCK *const x = &cpi->mb; + const MACROBLOCKD *const xd = &x->e_mbd; int p; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; @@ -746,15 +771,18 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, if (sub_index != -1) *(get_sb_index(xd, bsize)) = sub_index; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index > 0) return; + } set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); if (output_enabled) { - update_stats(cpi, mi_row, mi_col); + update_stats(cpi); (*tp)->token = EOSB_TOKEN; (*tp)++; @@ -776,7 +804,7 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - c1 = BLOCK_SIZE_AB4X4; + c1 = BLOCK_4X4; if (bsize >= BLOCK_SIZE_SB8X8) { set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -858,7 +886,7 @@ static void set_block_size(VP9_COMMON * const cm, MODE_INFO *m, int bhl = b_height_log2(bsize); int bsl = (bwl > bhl ? bwl : bhl); - int bs = (1 << bsl) / 2; // + int bs = (1 << bsl) / 2; // Block size in units of 8 pels. MODE_INFO *m2 = m + mi_row * mis + mi_col; for (row = 0; row < bs; row++) { for (col = 0; col < bs; col++) { @@ -906,28 +934,28 @@ typedef enum { static void tree_to_node(void *data, BLOCK_SIZE_TYPE block_size, vt_node *node) { int i; switch (block_size) { - case BLOCK_SIZE_SB64X64: { + case BLOCK_64X64: { v64x64 *vt = (v64x64 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_SB32X32: { + case BLOCK_32X32: { v32x32 *vt = (v32x32 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_MB16X16: { + case BLOCK_16X16: { v16x16 *vt = (v16x16 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_SB8X8: { + case BLOCK_8X8: { v8x8 *vt = (v8x8 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) @@ -1066,8 +1094,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, int dp; int pixels_wide = 64, pixels_high = 64; - vpx_memset(&vt, 0, sizeof(vt)); - + vp9_zero(vt); set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64); if (xd->mb_to_right_edge < 0) @@ -1087,7 +1114,8 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, dp = 64; if (cm->frame_type != KEY_FRAME) { int_mv nearest_mv, near_mv; - YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[0]; + const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)]; + YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; YV12_BUFFER_CONFIG *second_ref_fb = NULL; setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col, @@ -1103,7 +1131,6 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_SB64X64); d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; - } // Fill in the entire tree of 8x8 variances for splits. @@ -1130,32 +1157,32 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, // values. for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - fill_variance_tree(&vt.split[i].split[j], BLOCK_SIZE_MB16X16); + fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); } - fill_variance_tree(&vt.split[i], BLOCK_SIZE_SB32X32); + fill_variance_tree(&vt.split[i], BLOCK_32X32); } - fill_variance_tree(&vt, BLOCK_SIZE_SB64X64); + fill_variance_tree(&vt, BLOCK_64X64); // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold, or we // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, m, BLOCK_SIZE_SB64X64, mi_row, mi_col, + if (!set_vt_partitioning(cpi, &vt, m, BLOCK_64X64, mi_row, mi_col, 4)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); - if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_SIZE_SB32X32, + if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx), 2)) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); if (!set_vt_partitioning(cpi, &vt.split[i].split[j], m, - BLOCK_SIZE_MB16X16, + BLOCK_16X16, (mi_row + y32_idx + y16_idx), (mi_col + x32_idx + x16_idx), 1)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis, + set_block_size(cm, m, BLOCK_8X8, mis, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx)); } @@ -1165,6 +1192,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, } } } + static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, int64_t *dist, int do_recon) { @@ -1173,8 +1201,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, MACROBLOCKD *xd = &cpi->mb.e_mbd; const int mis = cm->mode_info_stride; int bsl = b_width_log2(bsize); - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int ms = num_4x4_blocks_wide / 2; int mh = num_4x4_blocks_high / 2; int bss = (1 << bsl) / 4; @@ -1191,7 +1219,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, int64_t none_dist = INT_MAX; int chosen_rate = INT_MAX; int64_t chosen_dist = INT_MAX; - BLOCK_SIZE_TYPE sub_subsize = BLOCK_SIZE_AB4X4; + BLOCK_SIZE_TYPE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE_TYPE bs_type = m->mbmi.sb_type; @@ -1203,6 +1231,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, subsize = get_subsize(bsize, partition); if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) { *rate = 0; *dist = 0; @@ -1213,6 +1243,10 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, } save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + x->fast_ms = 0; + x->pred_mv.as_int = 0; + x->subblock_ref = 0; + if (cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_SIZE_SB8X8) { @@ -1422,9 +1456,59 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, *dist = chosen_dist; } +static BLOCK_SIZE_TYPE min_partition_size[BLOCK_SIZE_TYPES] = + { BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, + BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 }; +static BLOCK_SIZE_TYPE max_partition_size[BLOCK_SIZE_TYPES] = + { BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, + BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 }; + + +// Look at neighboring blocks and set a min and max partition size based on +// what they chose. +static void rd_auto_partition_range(VP9_COMP *cpi, + BLOCK_SIZE_TYPE * min_block_size, + BLOCK_SIZE_TYPE * max_block_size) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const MODE_INFO *const mi = xd->mode_info_context; + const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi; + const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi; + const int left_in_image = xd->left_available && left_mbmi->mb_in_image; + const int above_in_image = xd->up_available && above_mbmi->mb_in_image; + + // Frequency check + if (cpi->sf.auto_min_max_partition_count <= 0) { + cpi->sf.auto_min_max_partition_count = + cpi->sf.auto_min_max_partition_interval; + *min_block_size = BLOCK_4X4; + *max_block_size = BLOCK_64X64; + return; + } else { + --cpi->sf.auto_min_max_partition_count; + } + + // Check for edge cases + if (!left_in_image && !above_in_image) { + *min_block_size = BLOCK_4X4; + *max_block_size = BLOCK_64X64; + } else if (!left_in_image) { + *min_block_size = min_partition_size[above_mbmi->sb_type]; + *max_block_size = max_partition_size[above_mbmi->sb_type]; + } else if (!above_in_image) { + *min_block_size = min_partition_size[left_mbmi->sb_type]; + *max_block_size = max_partition_size[left_mbmi->sb_type]; + } else { + *min_block_size = + min_partition_size[MIN(left_mbmi->sb_type, above_mbmi->sb_type)]; + *max_block_size = + max_partition_size[MAX(left_mbmi->sb_type, above_mbmi->sb_type)]; + } +} // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are -// unlikely to be selected depending on previously rate-distortion optimization +// unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, @@ -1444,20 +1528,22 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, (void) *tp_orig; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) { *rate = 0; *dist = 0; return; } + } assert(mi_height_log2(bsize) == mi_width_log2(bsize)); save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); // PARTITION_SPLIT - if (!cpi->sf.use_partitions_greater_than - || (cpi->sf.use_partitions_greater_than - && bsize > cpi->sf.greater_than_block_size)) { + if (!cpi->sf.auto_min_max_partition_size || + bsize >= cpi->sf.min_partition_size) { if (bsize > BLOCK_SIZE_SB8X8) { int r4 = 0; int64_t d4 = 0, sum_rd = 0; @@ -1500,41 +1586,39 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } } + // Use 4 subblocks' motion estimation results to speed up current + // partition's checking. x->fast_ms = 0; x->pred_mv.as_int = 0; x->subblock_ref = 0; - // Use 4 subblocks' motion estimation results to speed up current - // partition's checking. - if (cpi->sf.using_small_partition_info) { + if (cpi->sf.using_small_partition_info && + (!cpi->sf.auto_min_max_partition_size || + (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size))) { // Only use 8x8 result for non HD videos. // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0; int use_8x8 = 1; if (cm->frame_type && !cpi->is_src_frame_alt_ref && - ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) || - bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) { + ((use_8x8 && bsize == BLOCK_16X16) || + bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) { int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0; + PICK_MODE_CONTEXT *block_context = NULL; - if (bsize == BLOCK_SIZE_MB16X16) { - ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi. - ref_frame[0]; - ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi. - ref_frame[0]; - ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi. - ref_frame[0]; - ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi. - ref_frame[0]; - } else if (bsize == BLOCK_SIZE_SB32X32) { - ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0]; - ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0]; - ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0]; - ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0]; + if (bsize == BLOCK_16X16) { + block_context = x->sb8x8_context[xd->sb_index][xd->mb_index]; + } else if (bsize == BLOCK_32X32) { + block_context = x->mb_context[xd->sb_index]; } else if (bsize == BLOCK_SIZE_SB64X64) { - ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0]; - ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0]; - ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0]; - ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0]; + block_context = x->sb32_context; + } + + if (block_context) { + ref0 = block_context[0].mic.mbmi.ref_frame[0]; + ref1 = block_context[1].mic.mbmi.ref_frame[0]; + ref2 = block_context[2].mic.mbmi.ref_frame[0]; + ref3 = block_context[3].mic.mbmi.ref_frame[0]; } // Currently, only consider 4 inter ref frames. @@ -1544,42 +1628,14 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int d01, d23, d02, d13; // motion vector distance between 2 blocks // Get each subblock's motion vectors. - if (bsize == BLOCK_SIZE_MB16X16) { - mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0]. - as_mv.row; - mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0]. - as_mv.col; - mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0]. - as_mv.row; - mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0]. - as_mv.col; - mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0]. - as_mv.row; - mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0]. - as_mv.col; - mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0]. - as_mv.row; - mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0]. - as_mv.col; - } else if (bsize == BLOCK_SIZE_SB32X32) { - mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row; - mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col; - mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row; - mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col; - mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row; - mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col; - mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row; - mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col; - } else if (bsize == BLOCK_SIZE_SB64X64) { - mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row; - mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col; - mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row; - mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col; - mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row; - mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col; - mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row; - mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col; - } + mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row; + mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col; + mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row; + mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col; + mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row; + mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col; + mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row; + mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col; // Adjust sign if ref is alt_ref if (cm->ref_frame_sign_bias[ref0]) { @@ -1631,9 +1687,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } } - if (!cpi->sf.use_partitions_less_than - || (cpi->sf.use_partitions_less_than - && bsize <= cpi->sf.less_than_block_size)) { + if (!cpi->sf.auto_min_max_partition_size || + bsize <= cpi->sf.max_partition_size) { int larger_is_better = 0; // PARTITION_NONE if ((mi_row + (ms >> 1) < cm->mi_rows) && @@ -1804,8 +1859,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } // Examines 64x64 block and chooses a best reference frame -static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, - int mi_col, int *rate, int64_t *dist) { +static void rd_pick_reference_frame(VP9_COMP *cpi, int mi_row, int mi_col) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; MACROBLOCKD * const xd = &x->e_mbd; @@ -1836,23 +1890,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, cpi->set_ref_frame_mask = 0; } - *rate = r; - *dist = d; - // RDCOST(x->rdmult, x->rddiv, r, d) - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_SIZE_SB64X64); - - /*if (srate < INT_MAX && sdist < INT_MAX) - encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64); - - if (bsize == BLOCK_SIZE_SB64X64) { - assert(tp_orig < *tp); - assert(srate < INT_MAX); - assert(sdist < INT_MAX); - } else { - assert(tp_orig == *tp); - } - */ } static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, @@ -1877,10 +1915,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, else cpi->unused_mode_skip_mask = 0xFFFFFFFFFFFFFE00; - if (cpi->sf.reference_masking) { - rd_pick_reference_frame(cpi, tp, mi_row, mi_col, - &dummy_rate, &dummy_dist); - } + if (cpi->sf.reference_masking) + rd_pick_reference_frame(cpi, mi_row, mi_col); if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { @@ -1888,6 +1924,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, MODE_INFO *m = cm->mi + idx_str; MODE_INFO *p = cm->prev_mi + idx_str; + cpi->mb.source_variance = UINT_MAX; if (cpi->sf.use_one_partition_size_always) { set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64); set_partitioning(cpi, m, cpi->sf.always_this_block_size); @@ -1904,6 +1941,12 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, || cpi->common.show_frame == 0 || cpi->common.frame_type == KEY_FRAME || cpi->is_src_frame_alt_ref) { + // If required set upper and lower partition size limits + if (cpi->sf.auto_min_max_partition_size) { + rd_auto_partition_range(cpi, + &cpi->sf.min_partition_size, + &cpi->sf.max_partition_size); + } rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { @@ -1913,6 +1956,12 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, } } } else { + // If required set upper and lower partition size limits + if (cpi->sf.auto_min_max_partition_size) { + rd_auto_partition_range(cpi, &cpi->sf.min_partition_size, + &cpi->sf.max_partition_size); + } + rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } @@ -2086,7 +2135,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { } vpx_usec_timer_mark(&emr_timer); - cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); + cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } if (cpi->sf.skip_encode_sb) { @@ -2203,13 +2252,13 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, int n; assert(bwl < bsl && bhl < bsl); - if (bsize == BLOCK_SIZE_SB64X64) { - subsize = BLOCK_SIZE_SB32X32; - } else if (bsize == BLOCK_SIZE_SB32X32) { - subsize = BLOCK_SIZE_MB16X16; + if (bsize == BLOCK_64X64) { + subsize = BLOCK_32X32; + } else if (bsize == BLOCK_32X32) { + subsize = BLOCK_16X16; } else { - assert(bsize == BLOCK_SIZE_MB16X16); - subsize = BLOCK_SIZE_SB8X8; + assert(bsize == BLOCK_16X16); + subsize = BLOCK_8X8; } for (n = 0; n < 4; n++) { @@ -2267,7 +2316,7 @@ static void select_tx_mode(VP9_COMP *cpi) { } else { unsigned int total = 0; int i; - for (i = 0; i < TX_SIZE_MAX_SB; ++i) + for (i = 0; i < TX_SIZES; ++i) total += cpi->txfm_stepdown_count[i]; if (total) { double fraction = (double)cpi->txfm_stepdown_count[0] / total; @@ -2376,12 +2425,12 @@ void vp9_encode_frame(VP9_COMP *cpi) { (cpi->rd_filter_threshes[frame_type][i] + diff) / 2; } - for (i = 0; i < NB_TXFM_MODES; ++i) { + for (i = 0; i < TX_MODES; ++i) { int64_t pd = cpi->rd_tx_select_diff[i]; int diff; if (i == TX_MODE_SELECT) pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, - 2048 * (TX_SIZE_MAX_SB - 1), 0); + 2048 * (TX_SIZES - 1), 0); diff = (int) (pd / cpi->common.MBs); cpi->rd_tx_select_threshes[frame_type][i] += diff; cpi->rd_tx_select_threshes[frame_type][i] /= 2; @@ -2527,7 +2576,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, // Increase zbin size to suppress noise cpi->zbin_mode_boost = 0; if (cpi->zbin_mode_boost_enabled) { - if (mbmi->ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(mbmi)) { if (mbmi->mode == ZEROMV) { if (mbmi->ref_frame[0] != LAST_FRAME) cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; @@ -2600,7 +2649,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, if (output_enabled) { if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_SIZE_SB8X8 && - !(mbmi->ref_frame[0] != INTRA_FRAME && + !(is_inter_block(mbmi) && (mbmi->mb_skip_coeff || vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)))) { const uint8_t context = vp9_get_pred_context_tx_size(xd); @@ -2609,14 +2658,14 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int x, y; TX_SIZE sz = (cm->tx_mode == TX_MODE_SELECT) ? TX_32X32 : cm->tx_mode; // The new intra coding scheme requires no change of transform size - if (mi->mbmi.ref_frame[0] != INTRA_FRAME) { - if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) + if (is_inter_block(&mi->mbmi)) { + if (sz == TX_32X32 && bsize < BLOCK_32X32) sz = TX_16X16; - if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + if (sz == TX_16X16 && bsize < BLOCK_16X16) sz = TX_8X8; - if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8) + if (sz == TX_8X8 && bsize < BLOCK_8X8) sz = TX_4X4; - } else if (bsize >= BLOCK_SIZE_SB8X8) { + } else if (bsize >= BLOCK_8X8) { sz = mbmi->txfm_size; } else { sz = TX_4X4; diff --git a/libvpx/vp9/encoder/vp9_encodeintra.c b/libvpx/vp9/encoder/vp9_encodeintra.c index d49e53258..edbd2d909 100644 --- a/libvpx/vp9/encoder/vp9_encodeintra.c +++ b/libvpx/vp9/encoder/vp9_encodeintra.c @@ -21,7 +21,7 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { x->skip_encode = 0; mbmi->mode = DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_SIZE_MB16X16 ? + mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; vp9_encode_intra_block_y(&cpi->common, x, mbmi->sb_type); return vp9_get_mb_ss(x->plane[0].src_diff); diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c index 66e35a991..40b0a4e5a 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.c +++ b/libvpx/vp9/encoder/vp9_encodemb.c @@ -47,6 +47,27 @@ static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, xd->inv_txm4x4_add(dqcoeff, dest, stride); } +static void inverse_transform_b_8x8_add(int eob, + int16_t *dqcoeff, uint8_t *dest, + int stride) { + if (eob <= 1) + vp9_short_idct8x8_1_add(dqcoeff, dest, stride); + else if (eob <= 10) + vp9_short_idct10_8x8_add(dqcoeff, dest, stride); + else + vp9_short_idct8x8_add(dqcoeff, dest, stride); +} + +static void inverse_transform_b_16x16_add(int eob, + int16_t *dqcoeff, uint8_t *dest, + int stride) { + if (eob <= 1) + vp9_short_idct16x16_1_add(dqcoeff, dest, stride); + else if (eob <= 10) + vp9_short_idct10_16x16_add(dqcoeff, dest, stride); + else + vp9_short_idct16x16_add(dqcoeff, dest, stride); +} static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) { struct macroblock_plane *const p = &x->plane[plane]; @@ -120,12 +141,12 @@ static int trellis_get_coeff_context(const int16_t *scan, return pt; } -static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, +static void optimize_b(MACROBLOCK *mb, int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { - const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; + const int ref = is_inter_block(&xd->mode_info_context->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, @@ -214,10 +235,10 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, band = get_coef_band(band_translate, i + 1); pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += - mb->token_costs[tx_size][type][ref][0][band][pt] + mb->token_costs[tx_size][type][ref][band][0][pt] [tokens[next][0].token]; rate1 += - mb->token_costs[tx_size][type][ref][0][band][pt] + mb->token_costs[tx_size][type][ref][band][0][pt] [tokens[next][1].token]; } UPDATE_RD_COST(); @@ -265,12 +286,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, band = get_coef_band(band_translate, i + 1); if (t0 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt] + rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] [tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); - rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt] + rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] [tokens[next][1].token]; } } @@ -303,12 +324,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, /* Update the cost of each path if we're past the EOB token. */ if (t0 != DCT_EOB_TOKEN) { tokens[next][0].rate += - mb->token_costs[tx_size][type][ref][1][band][0][t0]; + mb->token_costs[tx_size][type][ref][band][1][0][t0]; tokens[next][0].token = ZERO_TOKEN; } if (t1 != DCT_EOB_TOKEN) { tokens[next][1].rate += - mb->token_costs[tx_size][type][ref][1][band][0][t1]; + mb->token_costs[tx_size][type][ref][band][1][0][t1]; tokens[next][1].token = ZERO_TOKEN; } best_index[i][0] = best_index[i][1] = 0; @@ -325,8 +346,8 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += mb->token_costs[tx_size][type][ref][0][band][pt][t0]; - rate1 += mb->token_costs[tx_size][type][ref][0][band][pt][t1]; + rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; + rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = i0 - 1; @@ -351,7 +372,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, } void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, - int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb, + int ss_txfrm_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { MACROBLOCKD *const xd = &mb->e_mbd; int x, y; @@ -359,51 +380,61 @@ void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, // find current entropy context txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - optimize_b(cm, mb, plane, block, bsize, + optimize_b(mb, plane, block, bsize, &ctx->ta[plane][x], &ctx->tl[plane][y], ss_txfrm_size / 2); } static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { const struct encode_b_args* const args = arg; - vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x, - args->ctx); + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->x, args->ctx); } -void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - struct optimize_ctx *ctx) { - int p; - - for (p = 0; p < MAX_MB_PLANE; p++) { - const struct macroblockd_plane* const plane = &xd->plane[p]; - const int bwl = b_width_log2(bsize) - plane->subsampling_x; - const int bhl = b_height_log2(bsize) - plane->subsampling_y; - const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - const TX_SIZE tx_size = p ? get_uv_tx_size(mbmi) - : mbmi->txfm_size; - int i, j; - - for (i = 0; i < 1 << bwl; i += 1 << tx_size) { - int c = 0; - ctx->ta[p][i] = 0; - for (j = 0; j < 1 << tx_size && !c; j++) { - c = ctx->ta[p][i] |= plane->above_context[i + j]; - } - } - for (i = 0; i < 1 << bhl; i += 1 << tx_size) { - int c = 0; - ctx->tl[p][i] = 0; - for (j = 0; j < 1 << tx_size && !c; j++) { - c = ctx->tl[p][i] |= plane->left_context[i + j]; - } - } +void optimize_init_b(int plane, BLOCK_SIZE_TYPE bsize, void *arg) { + const struct encode_b_args* const args = arg; + const MACROBLOCKD *xd = &args->x->e_mbd; + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const int bwl = b_width_log2(bsize) - pd->subsampling_x; + const int bhl = b_height_log2(bsize) - pd->subsampling_y; + const int bw = 1 << bwl, bh = 1 << bhl; + const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->txfm_size; + int i; + + switch (tx_size) { + case TX_4X4: + vpx_memcpy(args->ctx->ta[plane], pd->above_context, + sizeof(ENTROPY_CONTEXT) * bw); + vpx_memcpy(args->ctx->tl[plane], pd->left_context, + sizeof(ENTROPY_CONTEXT) * bh); + break; + case TX_8X8: + for (i = 0; i < bw; i += 2) + args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i]; + for (i = 0; i < bh; i += 2) + args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i]; + break; + case TX_16X16: + for (i = 0; i < bw; i += 4) + args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i]; + for (i = 0; i < bh; i += 4) + args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i]; + break; + case TX_32X32: + for (i = 0; i < bw; i += 8) + args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i]; + for (i = 0; i < bh; i += 8) + args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i]; + break; + default: + assert(0); } } void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { struct optimize_ctx ctx; struct encode_b_args arg = {cm, x, &ctx}; - vp9_optimize_init(&x->e_mbd, bsize, &ctx); + optimize_init_b(0, bsize, &arg); foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, optimize_block, &arg); } @@ -411,7 +442,10 @@ void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { struct optimize_ctx ctx; struct encode_b_args arg = {cm, x, &ctx}; - vp9_optimize_init(&x->e_mbd, bsize, &ctx); + int i; + for (i = 1; i < MAX_MB_PLANE; ++i) + optimize_init_b(i, bsize, &arg); + foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); } @@ -504,7 +538,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, xform_quant(plane, block, bsize, ss_txfrm_size, arg); if (x->optimize) - vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, x, args->ctx); if (x->skip_encode) return; @@ -516,10 +550,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); break; case TX_16X16: - vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride); + inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, + pd->dst.stride); break; case TX_8X8: - vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride); + inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, + pd->dst.stride); break; case TX_4X4: // this is like vp9_short_idct4x4 but has a special case around eob<=1 @@ -553,7 +589,7 @@ void vp9_encode_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { vp9_subtract_sby(x, bsize); if (x->optimize) - vp9_optimize_init(xd, bsize, &ctx); + optimize_init_b(0, bsize, &arg); foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); } @@ -564,8 +600,11 @@ void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { struct encode_b_args arg = {cm, x, &ctx}; vp9_subtract_sbuv(x, bsize); - if (x->optimize) - vp9_optimize_init(xd, bsize, &ctx); + if (x->optimize) { + int i; + for (i = 1; i < MAX_MB_PLANE; ++i) + optimize_init_b(i, bsize, &arg); + } foreach_transformed_block_uv(xd, bsize, encode_block, &arg); } @@ -576,8 +615,12 @@ void vp9_encode_sb(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { struct encode_b_args arg = {cm, x, &ctx}; vp9_subtract_sb(x, bsize); - if (x->optimize) - vp9_optimize_init(xd, bsize, &ctx); + + if (x->optimize) { + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) + optimize_init_b(i, bsize, &arg); + } foreach_transformed_block(xd, bsize, encode_block, &arg); } @@ -610,7 +653,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, // if (x->optimize) // vp9_optimize_b(plane, block, bsize, ss_txfrm_size, - // args->cm, x, args->ctx); + // x, args->ctx); switch (tx_size) { case TX_32X32: @@ -661,7 +704,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) - vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride); + inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); else vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); } @@ -690,7 +733,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) - vp9_short_idct8x8_add(dqcoeff, dst, pd->dst.stride); + inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); else vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); } @@ -699,11 +742,11 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan = get_scan_4x4(tx_type); iscan = get_iscan_4x4(tx_type); - if (mbmi->sb_type < BLOCK_SIZE_SB8X8 && plane == 0) { + if (mbmi->sb_type < BLOCK_8X8 && plane == 0) mode = xd->mode_info_context->bmi[block].as_mode; - } else { + else mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - } + xoff = 4 * (block & twmask); yoff = 4 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff; @@ -725,8 +768,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - inverse_transform_b_4x4_add(xd, *eob, dqcoeff, - dst, pd->dst.stride); + inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride); else vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); } diff --git a/libvpx/vp9/encoder/vp9_encodemb.h b/libvpx/vp9/encoder/vp9_encodemb.h index defaa48a3..f647fd979 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.h +++ b/libvpx/vp9/encoder/vp9_encodemb.h @@ -33,10 +33,8 @@ struct encode_b_args { struct optimize_ctx *ctx; }; -void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - struct optimize_ctx *ctx); void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, - int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x, + int ss_txfrm_size, MACROBLOCK *x, struct optimize_ctx *ctx); void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_optimize_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c index 2f5e16ccf..1c6fa3a3d 100644 --- a/libvpx/vp9/encoder/vp9_encodemv.c +++ b/libvpx/vp9/encoder/vp9_encodemv.c @@ -478,7 +478,7 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; int idx, idy; - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + if (mbmi->sb_type < BLOCK_8X8) { PARTITION_INFO *pi = x->partition_info; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index ec2e361ee..6ba2a4fc9 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -347,17 +347,17 @@ static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset; switch (xd->mode_info_context->mbmi.sb_type) { - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: vp9_mse8x8(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); break; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: vp9_mse16x8(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); break; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: vp9_mse8x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); @@ -403,13 +403,13 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // override the default variance function to use MSE switch (xd->mode_info_context->mbmi.sb_type) { - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: v_fn_ptr.vf = vp9_mse8x8; break; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: v_fn_ptr.vf = vp9_mse16x8; break; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: v_fn_ptr.vf = vp9_mse8x16; break; default: @@ -549,15 +549,15 @@ void vp9_first_pass(VP9_COMP *cpi) { if (mb_col * 2 + 1 < cm->mi_cols) { if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; + xd->mode_info_context->mbmi.sb_type = BLOCK_16X16; } else { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB16X8; + xd->mode_info_context->mbmi.sb_type = BLOCK_16X8; } } else { if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X16; + xd->mode_info_context->mbmi.sb_type = BLOCK_8X16; } else { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X8; + xd->mode_info_context->mbmi.sb_type = BLOCK_8X8; } } xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; @@ -1282,7 +1282,6 @@ static int detect_flash(VP9_COMP *cpi, int offset) { // Update the motion related elements to the GF arf boost calculation static void accumulate_frame_motion_stats( - VP9_COMP *cpi, FIRSTPASS_STATS *this_frame, double *this_frame_mv_in_out, double *mv_in_out_accumulator, @@ -1377,7 +1376,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, break; // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, + accumulate_frame_motion_stats(&this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); @@ -1413,7 +1412,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, break; // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, + accumulate_frame_motion_stats(&this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); @@ -1665,7 +1664,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { flash_detected = detect_flash(cpi, 0); // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &next_frame, + accumulate_frame_motion_stats(&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); @@ -2139,8 +2138,7 @@ void vp9_second_pass(VP9_COMP *cpi) { adjust_active_maxq(cpi->active_worst_quality, tmp_q); } #endif - - vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS)); + vp9_zero(this_frame); if (EOF == input_stats(cpi, &this_frame)) return; @@ -2318,7 +2316,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double kf_group_coded_err = 0.0; double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean + vp9_zero(next_frame); vp9_clear_system_state(); // __asm emms; start_position = cpi->twopass.stats_in; diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c index 7d6db071d..154d31af6 100644 --- a/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/libvpx/vp9/encoder/vp9_mbgraph.c @@ -63,7 +63,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); + vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, INT_MAX); @@ -77,9 +77,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, return best_err; } -static int do_16x16_motion_search(VP9_COMP *cpi, - int_mv *ref_mv, int_mv *dst_mv, - int buf_mb_y_offset, int mb_y_offset, +static int do_16x16_motion_search(VP9_COMP *cpi, int_mv *ref_mv, int_mv *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -118,9 +116,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, return err; } -static int do_16x16_zerozero_search(VP9_COMP *cpi, - int_mv *dst_mv, - int buf_mb_y_offset, int mb_y_offset) { +static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; @@ -210,7 +206,6 @@ static void update_mbgraph_mb_stats g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, - mb_y_offset, gld_y_offset, mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { @@ -224,8 +219,7 @@ static void update_mbgraph_mb_stats xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = alt_ref->y_stride; a_motion_error = do_16x16_zerozero_search(cpi, - &stats->ref[ALTREF_FRAME].m.mv, - mb_y_offset, arf_y_offset); + &stats->ref[ALTREF_FRAME].m.mv); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { @@ -248,8 +242,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int_mv arf_top_mv, gld_top_mv; MODE_INFO mi_local; - // Make sure the mi context starts in a consistent state. - memset(&mi_local, 0, sizeof(mi_local)); + vp9_zero(mi_local); // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; @@ -262,7 +255,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; xd->mode_info_context = &mi_local; - mi_local.mbmi.sb_type = BLOCK_SIZE_MB16X16; + mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE; diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index 0be98913e..88beee791 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -58,7 +58,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) { } int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], - int weight, int ishp) { + int weight) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; @@ -68,7 +68,7 @@ int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], - int error_per_bit, int ishp) { + int error_per_bit) { if (mvcost) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; @@ -269,7 +269,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, int maxc, minc, maxr, minr; int y_stride; int offset; - int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + @@ -300,8 +299,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // TODO: Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -371,13 +369,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, tc = bc; } - if (xd->allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - - if (usehp) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); @@ -451,7 +443,6 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, int maxc, minc, maxr, minr; int y_stride; int offset; - int usehp = xd->allow_high_precision_mv; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); uint8_t *y = xd->plane[0].pre[0].buf + @@ -490,8 +481,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -561,13 +551,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, tc = bc; } - if (xd->allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - - if (usehp) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); @@ -638,7 +622,6 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; - int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + @@ -654,15 +637,14 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -674,7 +656,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -687,8 +669,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -699,8 +680,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -742,8 +723,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -784,8 +765,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, src_stride, &sse); } - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -799,7 +780,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -822,8 +803,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, z, src_stride, &sse); } - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -835,8 +815,9 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); + if (down < bestmse) { *bestmv = this_mv; @@ -923,8 +904,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -933,12 +914,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, *sse1 = sse; } - if (x->e_mbd.allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - if (!usehp) + if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv))) return bestmse; /* Now do 1/8th pixel */ @@ -968,8 +944,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, z, src_stride, &sse); } - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -982,7 +958,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -1005,8 +981,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -1019,8 +994,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -1107,8 +1082,8 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -1153,15 +1128,14 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -1173,7 +1147,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -1186,8 +1160,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -1198,8 +1171,8 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -1238,8 +1211,8 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -1326,7 +1299,8 @@ int vp9_hex_search fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // adjust ref_mv to make sure it is within MV range - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->as_mv.row; bc = ref_mv->as_mv.col; @@ -1482,7 +1456,8 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; @@ -1580,11 +1555,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, if (bestsad == INT_MAX) return INT_MAX; - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, + mvcost, x->errorperbit); } int vp9_diamond_search_sadx4(MACROBLOCK *x, @@ -1624,7 +1597,8 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; @@ -1754,11 +1728,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, if (bestsad == INT_MAX) return INT_MAX; - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, + center_mv, mvjcost, mvcost, x->errorperbit); } /* do_refine: If last step (1-away) of n-step search doesn't pick the center @@ -1914,8 +1886,7 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2042,8 +2013,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2197,8 +2167,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2274,8 +2243,7 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2381,8 +2349,7 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2472,12 +2439,10 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, if (bestsad < INT_MAX) { // FIXME(rbultje, yunqing): add full-pixel averaging variance functions // so we don't have to use the subpixel with xoff=0,yoff=0 here. - int besterr = fn_ptr->svaf(best_address, in_what_stride, 0, 0, + return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, (unsigned int *)(&thissad), second_pred) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); - return besterr; + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); } else { return INT_MAX; } diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index c13ea7597..097d33c65 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -25,7 +25,7 @@ void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv); int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, - int *mvcost[2], int weight, int ishp); + int *mvcost[2], int weight); void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); diff --git a/libvpx/vp9/encoder/vp9_onyx_if.c b/libvpx/vp9/encoder/vp9_onyx_if.c index e5f1a5c2c..db039959f 100644 --- a/libvpx/vp9/encoder/vp9_onyx_if.c +++ b/libvpx/vp9/encoder/vp9_onyx_if.c @@ -243,16 +243,17 @@ void vp9_initialize_enc() { static void setup_features(VP9_COMP *cpi) { MACROBLOCKD *xd = &cpi->mb.e_mbd; - struct loopfilter *lf = &xd->lf; + struct loopfilter *const lf = &xd->lf; + struct segmentation *const seg = &xd->seg; // Set up default state for MB feature flags - xd->seg.enabled = 0; + seg->enabled = 0; - xd->seg.update_map = 0; - xd->seg.update_data = 0; - vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs)); + seg->update_map = 0; + seg->update_data = 0; + vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); lf->mode_ref_delta_enabled = 0; lf->mode_ref_delta_update = 0; @@ -324,6 +325,7 @@ static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) { static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct segmentation *seg = &xd->seg; int high_q = (int)(cpi->avg_q > 48.0); int qi_delta; @@ -332,26 +334,26 @@ static void configure_static_seg_features(VP9_COMP *cpi) { if (cm->frame_type == KEY_FRAME) { // Clear down the global segmentation map vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation vp9_disable_segmentation((VP9_PTR)cpi); // Clear down the segment features. - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); } else if (cpi->refresh_alt_ref_frame) { // If this is an alt ref frame // Clear down the global segmentation map vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation and individual segment features by default vp9_disable_segmentation((VP9_PTR)cpi); - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); // Scan frames from current to arf frame. // This function re-enables segmentation if appropriate. @@ -359,45 +361,45 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // If segmentation was enabled set those features needed for the // arf itself. - if (xd->seg.enabled) { - xd->seg.update_map = 1; - xd->seg.update_data = 1; + if (seg->enabled) { + seg->update_map = 1; + seg->update_data = 1; qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Where relevant assume segment data is delta data - xd->seg.abs_delta = SEGMENT_DELTADATA; + seg->abs_delta = SEGMENT_DELTADATA; } - } else if (xd->seg.enabled) { + } else if (seg->enabled) { // All other frames if segmentation has been enabled // First normal frame in a valid gf or alt ref group if (cpi->frames_since_golden == 0) { // Set up segment features for normal frames in an arf group if (cpi->source_alt_ref_active) { - xd->seg.update_map = 0; - xd->seg.update_data = 1; - xd->seg.abs_delta = SEGMENT_DELTADATA; + seg->update_map = 0; + seg->update_data = 1; + seg->abs_delta = SEGMENT_DELTADATA; qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 1.125)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Segment coding disabled for compred testing if (high_q || (cpi->static_mb_pct == 100)) { - vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP); + vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } } else { // Disable segmentation and clear down features if alt ref @@ -407,10 +409,10 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); } } else if (cpi->is_src_frame_alt_ref) { // Special case where we are coding over the top of a previous @@ -418,28 +420,28 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // Segment coding disabled for compred testing // Enable ref frame features for segment 0 as well - vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); // All mbs should use ALTREF_FRAME - vp9_clear_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME); - vp9_set_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_clear_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME); + vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME); + vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); // Skip all MBs if high Q (0,0 mv and skip coeffs) if (high_q) { - vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_SKIP); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP); + vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP); + vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } // Enable data update - xd->seg.update_data = 1; + seg->update_data = 1; } else { // All other frames. // No updates.. leave things as they are. - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; } } } @@ -718,7 +720,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_AB4X4; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; sf->adaptive_rd_thresh = 0; sf->use_lastframe_partitioning = 0; sf->tx_size_search_method = USE_FULL_RD; @@ -731,10 +733,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_one_partition_size_always = 0; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; - sf->use_partitions_less_than = 0; - sf->less_than_block_size = BLOCK_SIZE_MB16X16; - sf->use_partitions_greater_than = 0; - sf->greater_than_block_size = BLOCK_SIZE_SB8X8; + sf->auto_min_max_partition_size = 0; + sf->auto_min_max_partition_interval = 0; + sf->auto_min_max_partition_count = 0; + // sf->use_max_partition_size = 0; + sf->max_partition_size = BLOCK_64X64; + // sf->use_min_partition_size = 0; + sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; sf->disable_splitmv = 0; @@ -745,8 +750,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_uv_intra_rd_estimate = 0; sf->using_small_partition_info = 0; // Skip any mode not chosen at size < X for all sizes > X - // Hence BLOCK_SIZE_SB64X64 (skip is off) - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64; + // Hence BLOCK_64X64 (skip is off) + sf->unused_mode_skip_lvl = BLOCK_64X64; #if CONFIG_MULTIPLE_ARF // Switch segmentation off. @@ -769,8 +774,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { #endif sf->use_avoid_tested_higherror = 1; sf->adaptive_rd_thresh = 1; - sf->last_chroma_intra_mode = TM_PRED; - if (speed == 1) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->less_rectangular_check = 1; @@ -784,14 +787,20 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->common.show_frame == 0); sf->disable_splitmv = (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32; + sf->unused_mode_skip_lvl = BLOCK_32X32; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA; - sf->last_chroma_intra_mode = H_PRED; + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->use_uv_intra_rd_estimate = 1; sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->auto_mv_step_size = 1; + + sf->auto_min_max_partition_size = 1; + // sf->use_max_partition_size = 1; + // sf->use_min_partition_size = 1; + sf->auto_min_max_partition_interval = 1; } if (speed == 2) { sf->adjust_thresholds_by_speed = 1; @@ -801,7 +810,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_lastframe_partitioning = 1; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32; + sf->unused_mode_skip_lvl = BLOCK_32X32; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only || cpi->common.show_frame == 0) ? @@ -810,11 +819,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH; + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; sf->last_chroma_intra_mode = DC_PRED; + sf->use_uv_intra_rd_estimate = 1; sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; - sf->use_uv_intra_rd_estimate = 1; sf->using_small_partition_info = 1; sf->disable_splitmv = (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; @@ -831,7 +842,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH; + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->disable_splitmv = 1; @@ -840,7 +853,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->use_one_partition_size_always = 1; - sf->always_this_block_size = BLOCK_SIZE_MB16X16; + sf->always_this_block_size = BLOCK_16X16; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only || cpi->common.show_frame == 0) ? @@ -849,7 +862,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH; + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; sf->use_rd_breakout = 1; sf->optimize_coefficients = 0; sf->auto_mv_step_size = 1; @@ -861,15 +876,15 @@ void vp9_set_speed_features(VP9_COMP *cpi) { /* if (speed == 2) { sf->first_step = 0; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; - sf->use_partitions_less_than = 1; - sf->less_than_block_size = BLOCK_SIZE_MB16X16; + sf->comp_inter_joint_search_thresh = BLOCK_8X8; + sf->use_max_partition_size = 1; + sf->max_partition_size = BLOCK_16X16; } if (speed == 3) { sf->first_step = 0; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; - sf->use_partitions_greater_than = 1; - sf->greater_than_block_size = BLOCK_SIZE_SB8X8; + sf->comp_inter_joint_search_thresh = BLOCK_B8X8; + sf->use_min_partition_size = 1; + sf->min_partition_size = BLOCK_8X8; } */ @@ -1383,7 +1398,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm = &cpi->common; - vpx_memset(cpi, 0, sizeof(VP9_COMP)); + vp9_zero(*cpi); if (setjmp(cm->error.jmp)) { VP9_PTR ptr = ctx.ptr; @@ -1833,7 +1848,10 @@ void vp9_remove_compressor(VP9_PTR *ptr) { { printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); printf("\n_frames recive_data encod_mb_row compress_frame Total\n"); - printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_mb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000); + printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, + cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000, + cpi->time_compress_data / 1000, + (cpi->time_receive_data + cpi->time_compress_data) / 1000); } #endif @@ -2406,8 +2424,9 @@ static void update_reference_frames(VP9_COMP * const cpi) { static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct loopfilter *lf = &xd->lf; if (xd->lossless) { - xd->lf.filter_level = 0; + lf->filter_level = 0; } else { struct vpx_usec_timer timer; @@ -2421,9 +2440,9 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } - if (xd->lf.filter_level > 0) { - vp9_set_alt_lf_level(cpi, xd->lf.filter_level); - vp9_loop_filter_frame(cm, xd, xd->lf.filter_level, 0); + if (lf->filter_level > 0) { + vp9_set_alt_lf_level(cpi, lf->filter_level); + vp9_loop_filter_frame(cm, xd, lf->filter_level, 0); } vp9_extend_frame_inner_borders(cm->frame_to_show, @@ -2513,6 +2532,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, SPEED_FEATURES *sf = &cpi->sf; unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height); + struct segmentation *seg = &xd->seg; #if RESET_FOREACH_FILTER int q_low0; int q_high0; @@ -2612,9 +2632,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, setup_features(cpi); // If segmentation is enabled force a map update for key frames - if (xd->seg.enabled) { - xd->seg.update_map = 1; - xd->seg.update_data = 1; + if (seg->enabled) { + seg->update_map = 1; + seg->update_data = 1; } // The alternate reference frame cannot be active for a key frame @@ -2818,7 +2838,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } #endif loop_count = 0; - vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes)); + vp9_zero(cpi->rd_tx_select_threshes); if (cm->frame_type != KEY_FRAME) { /* TODO: Decide this more intelligently */ @@ -3173,7 +3193,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (!cpi->common.error_resilient_mode && !cpi->common.frame_parallel_decoding_mode) { vp9_adapt_mode_probs(&cpi->common); - vp9_adapt_mode_context(&cpi->common); vp9_adapt_mv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv); } } @@ -3994,7 +4013,7 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, unsigned int threshold[MAX_SEGMENTS]) { VP9_COMP *cpi = (VP9_COMP *) comp; signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS]; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct segmentation *seg = &cpi->mb.e_mbd.seg; int i; if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) @@ -4021,14 +4040,14 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, // Enable the loop and quant changes in the feature mask for (i = 0; i < MAX_SEGMENTS; i++) { if (delta_q[i]) - vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); else - vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q); + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q); if (delta_lf[i]) - vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF); else - vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF); + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF); } // Initialise the feature data structure diff --git a/libvpx/vp9/encoder/vp9_onyx_int.h b/libvpx/vp9/encoder/vp9_onyx_int.h index 0798927bd..c258829c2 100644 --- a/libvpx/vp9/encoder/vp9_onyx_int.h +++ b/libvpx/vp9/encoder/vp9_onyx_int.h @@ -77,7 +77,7 @@ typedef struct { // 0 = ZERO_MV, MV signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - vp9_coeff_probs_model coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES]; vp9_prob y_mode_prob[4][VP9_INTRA_MODES - 1]; vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1]; @@ -145,6 +145,8 @@ typedef struct { // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { THR_NEARESTMV, + THR_DC, + THR_NEARESTA, THR_NEARESTG, THR_NEWMV, @@ -152,8 +154,6 @@ typedef enum { THR_NEARMV, THR_COMP_NEARESTGA, - THR_DC, - THR_NEWG, THR_NEWA, THR_NEARA, @@ -224,6 +224,10 @@ typedef enum { // skips oblique intra modes at angles 27, 63, 117, 153 if the best // intra so far is not one of the neighboring directions FLAG_SKIP_INTRA_DIRMISMATCH = 16, + + // skips intra modes other than DC_PRED if the source variance + // is small + FLAG_SKIP_INTRA_LOWVAR = 32, } MODE_SEARCH_SKIP_LOGIC; typedef struct { @@ -258,10 +262,13 @@ typedef struct { int unused_mode_skip_lvl; int reference_masking; BLOCK_SIZE_TYPE always_this_block_size; - int use_partitions_greater_than; - BLOCK_SIZE_TYPE greater_than_block_size; - int use_partitions_less_than; - BLOCK_SIZE_TYPE less_than_block_size; + int auto_min_max_partition_size; + int auto_min_max_partition_interval; + int auto_min_max_partition_count; + BLOCK_SIZE_TYPE min_partition_size; + BLOCK_SIZE_TYPE max_partition_size; + // int use_min_partition_size; // not used in code + // int use_max_partition_size; int adjust_partitioning_from_last_frame; int last_partitioning_redo_frequency; int disable_splitmv; @@ -370,9 +377,9 @@ typedef struct VP9_COMP { unsigned int single_ref_count[REF_CONTEXTS][2][2]; unsigned int comp_ref_count[REF_CONTEXTS][2]; - int64_t rd_tx_select_diff[NB_TXFM_MODES]; + int64_t rd_tx_select_diff[TX_MODES]; // FIXME(rbultje) can this overflow? - int rd_tx_select_threshes[4][NB_TXFM_MODES]; + int rd_tx_select_threshes[4][TX_MODES]; int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1]; @@ -457,9 +464,9 @@ typedef struct VP9_COMP { nmv_context_counts NMVcount; - vp9_coeff_count coef_counts[TX_SIZE_MAX_SB][BLOCK_TYPES]; - vp9_coeff_probs_model frame_coef_probs[TX_SIZE_MAX_SB][BLOCK_TYPES]; - vp9_coeff_stats frame_branch_ct[TX_SIZE_MAX_SB][BLOCK_TYPES]; + vp9_coeff_count coef_counts[TX_SIZES][BLOCK_TYPES]; + vp9_coeff_probs_model frame_coef_probs[TX_SIZES][BLOCK_TYPES]; + vp9_coeff_stats frame_branch_ct[TX_SIZES][BLOCK_TYPES]; int gfu_boost; int last_boost; @@ -527,7 +534,7 @@ typedef struct VP9_COMP { uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; - uint64_t time_encode_mb_row; + uint64_t time_encode_sb_row; struct twopass_rc { unsigned int section_intra_rating; @@ -619,7 +626,7 @@ typedef struct VP9_COMP { unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; - unsigned int txfm_stepdown_count[TX_SIZE_MAX_SB]; + unsigned int txfm_stepdown_count[TX_SIZES]; int initial_width; int initial_height; diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 843cf3f03..2d932500e 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -54,6 +54,8 @@ DECLARE_ALIGNED(16, extern const uint8_t, const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARESTMV, LAST_FRAME, NONE}, + {DC_PRED, INTRA_FRAME, NONE}, + {NEARESTMV, ALTREF_FRAME, NONE}, {NEARESTMV, GOLDEN_FRAME, NONE}, {NEWMV, LAST_FRAME, NONE}, @@ -61,8 +63,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARMV, LAST_FRAME, NONE}, {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, - {DC_PRED, INTRA_FRAME, NONE}, - {NEWMV, GOLDEN_FRAME, NONE}, {NEWMV, ALTREF_FRAME, NONE}, {NEARMV, ALTREF_FRAME, NONE}, @@ -109,7 +109,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] = #define MAX_RD_THRESH_FREQ_FACT 32 #define MAX_RD_THRESH_FREQ_INC 1 -static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2], +static void fill_token_costs(vp9_coeff_cost *c, vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { int i, j, k, l; TX_SIZE t; @@ -120,12 +120,12 @@ static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2], for (l = 0; l < PREV_COEF_CONTEXTS; l++) { vp9_prob probs[ENTROPY_NODES]; vp9_model_to_full_probs(p[t][i][j][k][l], probs); - vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs, + vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree); - vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs, + vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, vp9_coef_tree); - assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] == - c[t][i][j][1][k][l][DCT_EOB_TOKEN]); + assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] == + c[t][i][j][k][1][l][DCT_EOB_TOKEN]); } } @@ -453,7 +453,7 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, int *out_rate_sum, int64_t *out_dist_sum, int *out_skip) { int t = 4, j, k; - BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4; + BLOCK_SIZE_TYPE bs = BLOCK_4X4; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int width = plane_block_width(bsize, pd); @@ -513,14 +513,19 @@ int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, return error; } -static const int16_t band_counts[TX_SIZE_MAX_SB][8] = { - { 1, 2, 3, 4, 3, 16 - 13 }, - { 1, 2, 3, 4, 11, 64 - 21 }, - { 1, 2, 3, 4, 11, 256 - 21 }, - { 1, 2, 3, 4, 11, 1024 - 21 }, +/* The trailing '0' is a terminator which is used inside cost_coeffs() to + * decide whether to include cost of a trailing EOB node or not (i.e. we + * can skip this if the last coefficient in this transform block, e.g. the + * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, + * were non-zero). */ +static const int16_t band_counts[TX_SIZES][8] = { + { 1, 2, 3, 4, 3, 16 - 13, 0 }, + { 1, 2, 3, 4, 11, 64 - 21, 0 }, + { 1, 2, 3, 4, 11, 256 - 21, 0 }, + { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; -static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, +static INLINE int cost_coeffs(MACROBLOCK *mb, int plane, int block, PLANE_TYPE type, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, @@ -528,11 +533,11 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, MACROBLOCKD *const xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt, c, cost; - const int16_t *band_count = band_counts[tx_size]; + const int16_t *band_count = &band_counts[tx_size][1]; const int eob = xd->plane[plane].eobs[block]; const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); const int ref = mbmi->ref_frame[0] != INTRA_FRAME; - unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] + unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; uint8_t token_cache[1024]; @@ -552,13 +557,14 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; c = 0; } else { - int v, prev_t, band = 1, band_left = band_count[1]; + int v, prev_t, band_left = *band_count++; // dc token v = qcoeff_ptr[0]; prev_t = vp9_dct_value_tokens_ptr[v].token; - cost = token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; + cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; token_cache[0] = vp9_pt_energy_class[prev_t]; + ++token_costs; // ac tokens for (c = 1; c < eob; c++) { @@ -568,18 +574,19 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, v = qcoeff_ptr[rc]; t = vp9_dct_value_tokens_ptr[v].token; pt = get_coef_context(nb, token_cache, c); - cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v]; + cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; token_cache[rc] = vp9_pt_energy_class[t]; prev_t = t; if (!--band_left) { - band_left = band_count[++band]; + band_left = *band_count++; + ++token_costs; } } // eob token - if (band < 6) { + if (band_left) { pt = get_coef_context(nb, token_cache, c); - cost += token_costs[0][band][pt][DCT_EOB_TOKEN]; + cost += (*token_costs)[0][pt][DCT_EOB_TOKEN]; } } @@ -639,7 +646,7 @@ static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize, txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, &y_idx); - args->rate += cost_coeffs(args->cm, args->x, plane, block, + args->rate += cost_coeffs(args->x, plane, block, xd->plane[plane].plane_type, args->t_above + x_idx, args->t_left + y_idx, args->tx_size, args->scan, args->nb); @@ -831,7 +838,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, int64_t ref_best_rd, BLOCK_SIZE_TYPE bs) { const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -859,25 +866,25 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int64_t *d, int64_t *distortion, int *s, int *skip, - int64_t txfm_cache[NB_TXFM_MODES], + int64_t tx_cache[TX_MODES], BLOCK_SIZE_TYPE bs) { - const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + const TX_SIZE max_tx_size = TX_32X32 + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); - int64_t rd[TX_SIZE_MAX_SB][2]; + int64_t rd[TX_SIZES][2]; int n, m; int s0, s1; const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { r[n][1] = r[n][0]; if (r[n][0] == INT_MAX) continue; - for (m = 0; m <= n - (n == max_txfm_size); m++) { + for (m = 0; m <= n - (n == max_tx_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(tx_probs[m]); else @@ -889,7 +896,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { if (d[n] == INT64_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; continue; @@ -902,13 +909,13 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && (cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && @@ -928,34 +935,34 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mbmi->txfm_size]; - txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; - txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; - txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; - txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; - if (max_txfm_size == TX_32X32 && + tx_cache[ONLY_4X4] = rd[TX_4X4][0]; + tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; + tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; + tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; + if (max_tx_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) - txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (max_txfm_size >= TX_16X16 && + tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; + else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) - txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; + tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else - txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? + tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? rd[TX_4X4][1] : rd[TX_8X8][1]; - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) { cpi->txfm_stepdown_count[0]++; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; + cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; + cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++; } else { - cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; + cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++; } } @@ -967,16 +974,16 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_TYPE bs, int *model_used) { const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); - int64_t rd[TX_SIZE_MAX_SB][2]; + int64_t rd[TX_SIZES][2]; int n, m; int s0, s1; - double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00}; - // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00}; + double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; + // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00}; const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs); @@ -1065,11 +1072,11 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs, - int64_t txfm_cache[NB_TXFM_MODES], + int64_t txfm_cache[TX_MODES], int64_t ref_best_rd) { VP9_COMMON *const cm = &cpi->common; - int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB]; - int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB]; + int r[TX_SIZES][2], s[TX_SIZES]; + int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -1080,7 +1087,7 @@ static void super_block_yrd(VP9_COMP *cpi, if (cpi->sf.tx_size_search_method == USE_LARGESTALL || (cpi->sf.tx_size_search_method != USE_FULL_RD && mbmi->ref_frame[0] == INTRA_FRAME)) { - vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); + vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, bs); if (psse) @@ -1090,49 +1097,47 @@ static void super_block_yrd(VP9_COMP *cpi, if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && mbmi->ref_frame[0] > INTRA_FRAME) { - int model_used[TX_SIZE_MAX_SB] = {1, 1, 1, 1}; - if (bs >= BLOCK_SIZE_SB32X32) { - if (model_used[TX_32X32]) { + int model_used[TX_SIZES] = {1, 1, 1, 1}; + if (bs >= BLOCK_32X32) { + if (model_used[TX_32X32]) model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], &sse[TX_32X32], INT64_MAX, bs, TX_32X32); - } } - if (bs >= BLOCK_SIZE_MB16X16) { - if (model_used[TX_16X16]) { + if (bs >= BLOCK_16X16) { + if (model_used[TX_16X16]) model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], &sse[TX_16X16], INT64_MAX, bs, TX_16X16); - } } - if (model_used[TX_8X8]) { + if (model_used[TX_8X8]) model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], &sse[TX_8X8], INT64_MAX, bs, TX_8X8); - } - if (model_used[TX_4X4]) { + + if (model_used[TX_4X4]) model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], &sse[TX_4X4], INT64_MAX, bs, TX_4X4); - } + choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, skip, sse, ref_best_rd, bs, model_used); } else { - if (bs >= BLOCK_SIZE_SB32X32) + if (bs >= BLOCK_32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], &sse[TX_32X32], ref_best_rd, bs, TX_32X32); - if (bs >= BLOCK_SIZE_MB16X16) + if (bs >= BLOCK_16X16) super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], &sse[TX_16X16], ref_best_rd, bs, TX_16X16); @@ -1174,28 +1179,30 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, - BLOCK_SIZE_TYPE bsize) { + BLOCK_SIZE_TYPE bsize, + int64_t rd_thresh) { MB_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; + int64_t best_rd = rd_thresh; int rate = 0; int64_t distortion; - VP9_COMMON *const cm = &cpi->common; struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - uint8_t *src, *dst; + uint8_t *src_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib, + p->src.buf, src_stride); + uint8_t *dst_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib, + pd->dst.buf, dst_stride); int16_t *src_diff, *coeff; ENTROPY_CONTEXT ta[2], tempa[2]; ENTROPY_CONTEXT tl[2], templ[2]; TX_TYPE tx_type = DCT_DCT; - TX_TYPE best_tx_type = DCT_DCT; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy, block; - DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); + uint8_t best_dst[8 * 8]; assert(ib < 4); @@ -1223,17 +1230,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { int64_t ssz; const int16_t *scan; + uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride; + uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride; block = ib + idy * 2 + idx; xd->mode_info_context->bmi[block].as_mode = mode; - src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - p->src.buf, src_stride); src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, p->src_diff); coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - pd->dst.buf, dst_stride); - vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), + vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -1252,12 +1257,14 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, } scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block)); - ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, + ratey += cost_coeffs(x, 0, block, PLANE_TYPE_Y_WITH_DC, tempa + idx, templ + idy, TX_4X4, scan, vp9_get_coef_neighbors_handle(scan)); distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block, 16), 16, &ssz) >> 2; + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + goto next; if (tx_type != DCT_DCT) vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), @@ -1277,61 +1284,40 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; - best_tx_type = tx_type; vpx_memcpy(a, tempa, sizeof(tempa)); vpx_memcpy(l, templ, sizeof(templ)); - for (idy = 0; idy < num_4x4_blocks_high; ++idy) { - for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { - block = ib + idy * 2 + idx; - vpx_memcpy(best_dqcoeff[idy * 2 + idx], - BLOCK_OFFSET(pd->dqcoeff, block, 16), - sizeof(best_dqcoeff[0])); - } - } + for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) + vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, + num_4x4_blocks_wide * 4); } + next: + {} } - if (x->skip_encode) + if (best_rd >= rd_thresh || x->skip_encode) return best_rd; - for (idy = 0; idy < num_4x4_blocks_high; ++idy) { - for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { - block = ib + idy * 2 + idx; - xd->mode_info_context->bmi[block].as_mode = *best_mode; - src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - p->src.buf, src_stride); - dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, - pd->dst.buf, dst_stride); - - vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4, - *best_mode, - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride); - // inverse transform - if (best_tx_type != DCT_DCT) - vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, - dst_stride, best_tx_type); - else - xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, - dst_stride); - } - } + for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) + vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, + num_4x4_blocks_wide * 4); return best_rd; } -static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int64_t *Distortion, int64_t best_rd) { +static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, + MACROBLOCK * const mb, + int * const rate, + int * const rate_y, + int64_t * const distortion, + int64_t best_rd) { int i, j; MACROBLOCKD *const xd = &mb->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; int cost = 0; - int64_t distortion = 0; + int64_t total_distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; @@ -1343,12 +1329,13 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, bmode_costs = mb->mbmode_cost; + // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const int mis = xd->mode_info_stride; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); - int64_t UNINITIALIZED_IS_SAFE(d); + int64_t UNINITIALIZED_IS_SAFE(d), this_rd; i = idy * 2 + idx; if (cpi->common.frame_type == KEY_FRAME) { @@ -1359,11 +1346,16 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, bmode_costs = mb->y_mode_costs[A][L]; } - total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, - t_above + idx, t_left + idy, - &r, &ry, &d, bsize); + this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, + t_above + idx, t_left + idy, + &r, &ry, &d, bsize, + best_rd - total_rd); + if (this_rd >= best_rd - total_rd) + return INT64_MAX; + + total_rd += this_rd; cost += r; - distortion += d; + total_distortion += d; tot_rate_y += ry; mic->bmi[i].as_mode = best_mode; @@ -1377,19 +1369,19 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, } } - *Rate = cost; + *rate = cost; *rate_y = tot_rate_y; - *Distortion = distortion; + *distortion = total_distortion; xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode; - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); + return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); } static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE_TYPE bsize, - int64_t txfm_cache[NB_TXFM_MODES], + int64_t tx_cache[TX_MODES], int64_t best_rd) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); @@ -1400,14 +1392,13 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int i; int *bmode_costs = x->mbmode_cost; - if (cpi->sf.tx_size_search_method == USE_FULL_RD) { - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; - } + if (cpi->sf.tx_size_search_method == USE_FULL_RD) + for (i = 0; i < TX_MODES; i++) + tx_cache[i] = INT64_MAX; - /* Y Search for 32x32 intra prediction mode */ + /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t local_txfm_cache[NB_TXFM_MODES]; + int64_t local_tx_cache[TX_MODES]; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; @@ -1421,7 +1412,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, x->e_mbd.mode_info_context->mbmi.mode = mode; super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, - bsize, local_txfm_cache, best_rd); + bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1440,11 +1431,11 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd = this_rd + local_txfm_cache[i] - - local_txfm_cache[cpi->common.tx_mode]; - if (adj_rd < txfm_cache[i]) { - txfm_cache[i] = adj_rd; + for (i = 0; i < TX_MODES; i++) { + const int64_t adj_rd = this_rd + local_tx_cache[i] - + local_tx_cache[cpi->common.tx_mode]; + if (adj_rd < tx_cache[i]) { + tx_cache[i] = adj_rd; } } } @@ -1537,8 +1528,6 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); - x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; - return this_rd; } @@ -1609,8 +1598,8 @@ static int labels2mode(MACROBLOCK *x, int i, MB_MODE_INFO * mbmi = &mic->mbmi; int cost = 0, thismvcost = 0; int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array @@ -1623,12 +1612,11 @@ static int labels2mode(MACROBLOCK *x, int i, case NEWMV: this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, - 102, xd->allow_high_precision_mv); + 102); if (mbmi->ref_frame[1] > 0) { this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, - mvjcost, mvcost, 102, - xd->allow_high_precision_mv); + mvjcost, mvcost, 102); } break; case NEARESTMV: @@ -1678,11 +1666,12 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { int k; - VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - const int width = plane_block_width(bsize, &xd->plane[0]); - const int height = plane_block_height(bsize, &xd->plane[0]); + struct macroblockd_plane *const pd = &xd->plane[0]; + MODE_INFO *const mi = xd->mode_info_context; + const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; + const int width = plane_block_width(bsize, pd); + const int height = plane_block_height(bsize, pd); int idx, idy; const int src_stride = x->plane[0].src.stride; uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, @@ -1692,39 +1681,33 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, x->plane[0].src_diff); int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); + pd->pre[0].buf, + pd->pre[0].stride); uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + pd->dst.buf, + pd->dst.stride); int64_t thisdistortion = 0, thissse = 0; int thisrate = 0; - vp9_build_inter_predictor(pre, - xd->plane[0].pre[0].stride, - dst, - xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[0], + vp9_build_inter_predictor(pre, pd->pre[0].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[0].as_mv, &xd->scale_factor[0], - width, height, 0, &xd->subpix, - MV_PRECISION_Q3); + width, height, 0, &xd->subpix, MV_PRECISION_Q3); - if (xd->mode_info_context->mbmi.ref_frame[1] > 0) { + if (mi->mbmi.ref_frame[1] > 0) { uint8_t* const second_pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[1].buf, - xd->plane[0].pre[1].stride); - vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, - dst, xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[1], + pd->pre[1].buf, pd->pre[1].stride); + vp9_build_inter_predictor(second_pre, pd->pre[1].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[1].as_mv, &xd->scale_factor[1], - width, height, 1, - &xd->subpix, MV_PRECISION_Q3); + width, height, 1, &xd->subpix, MV_PRECISION_Q3); } - vp9_subtract_block(height, width, src_diff, 8, - src, src_stride, - dst, xd->plane[0].dst.stride); + vp9_subtract_block(height, width, src_diff, 8, src, src_stride, + dst, pd->dst.stride); k = i; for (idy = 0; idy < height / 4; ++idy) { @@ -1737,11 +1720,10 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, k, DCT_DCT, 16); - thisdistortion += vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, - k, 16), 16, &ssz); + thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k, 16), + 16, &ssz); thissse += ssz; - thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, + thisrate += cost_coeffs(x, 0, k, PLANE_TYPE_Y_WITH_DC, ta + (k & 1), tl + (k >> 1), TX_4X4, vp9_default_scan_4x4, @@ -1836,8 +1818,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int label_mv_thresh; int segmentyrate = 0; BLOCK_SIZE_TYPE bsize = mbmi->sb_type; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; vp9_variance_fn_ptr_t *v_fn_ptr; ENTROPY_CONTEXT t_above[2], t_left[2]; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; @@ -1871,12 +1853,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, &frame_mv[NEARESTMV][mbmi->ref_frame[0]], &frame_mv[NEARMV][mbmi->ref_frame[0]], - i, 0); + i, 0, mi_row, mi_col); if (mbmi->ref_frame[1] > 0) vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, &frame_mv[NEARESTMV][mbmi->ref_frame[1]], &frame_mv[NEARMV][mbmi->ref_frame[1]], - i, 1); + i, 1, mi_row, mi_col); // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { @@ -1984,7 +1966,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // Should we do a full search (best quality only) if (cpi->compressor_speed == 0) { /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, @@ -2204,7 +2186,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; - vpx_memset(bsi, 0, sizeof(*bsi)); + vp9_zero(*bsi); bsi->segment_rd = best_rd; bsi->ref_mv = best_ref_mv; @@ -2358,7 +2340,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int_mv *ref_mv, int_mv *second_ref_mv, int64_t comp_pred_diff[NB_PREDICTION_TYPES], - int64_t txfm_size_diff[NB_TXFM_MODES], + int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2380,7 +2362,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // FIXME(rbultje) does this memcpy the whole array? I believe sizeof() // doesn't actually work this way - memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); + memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); memcpy(ctx->best_filter_diff, best_filter_diff, sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1)); } @@ -2444,7 +2426,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, xd->prev_mode_info_context, frame_type, mbmi->ref_mvs[frame_type], - cpi->common.ref_frame_sign_bias); + cpi->common.ref_frame_sign_bias, mi_row, mi_col); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, @@ -2469,7 +2451,7 @@ static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { return scaled_ref_frame; } -static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { +static INLINE int get_switchable_rate(MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -2575,7 +2557,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, x->nmvjointcost, x->mvcost, - 96, xd->allow_high_precision_mv); + 96); if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) @@ -2663,7 +2645,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]], + &frame_mv[refs[!id]].as_mv, &xd->scale_factor[!id], pw, ph, 0, &xd->subpix, MV_PRECISION_Q3); @@ -2730,12 +2712,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], &mbmi->ref_mvs[refs[0]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], &mbmi->ref_mvs[refs[1]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); vpx_free(second_pred); } @@ -2775,46 +2755,36 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; - switch (this_mode) { + if (this_mode == NEWMV) { int rate_mv; - case NEWMV: - if (is_comp_pred) { - // Initialize mv using single prediction mode result. - frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; - frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; + if (is_comp_pred) { + // Initialize mv using single prediction mode result. + frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - joint_motion_search(cpi, x, bsize, frame_mv, - mi_row, mi_col, single_newmv, &rate_mv); - } else { - rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], - &mbmi->ref_mvs[refs[0]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); - rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], - &mbmi->ref_mvs[refs[1]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); - } - if (frame_mv[refs[0]].as_int == INVALID_MV || - frame_mv[refs[1]].as_int == INVALID_MV) - return INT64_MAX; - *rate2 += rate_mv; + if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + joint_motion_search(cpi, x, bsize, frame_mv, + mi_row, mi_col, single_newmv, &rate_mv); } else { - int_mv tmp_mv; - single_motion_search(cpi, x, bsize, mi_row, mi_col, - &tmp_mv, &rate_mv); - *rate2 += rate_mv; - frame_mv[refs[0]].as_int = - xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int; - single_newmv[refs[0]].as_int = tmp_mv.as_int; + rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], + &mbmi->ref_mvs[refs[0]][0], + x->nmvjointcost, x->mvcost, 96); + rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], + &mbmi->ref_mvs[refs[1]][0], + x->nmvjointcost, x->mvcost, 96); } - break; - case NEARMV: - case NEARESTMV: - case ZEROMV: - default: - break; + if (frame_mv[refs[0]].as_int == INVALID_MV || + frame_mv[refs[1]].as_int == INVALID_MV) + return INT64_MAX; + *rate2 += rate_mv; + } else { + int_mv tmp_mv; + single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); + *rate2 += rate_mv; + frame_mv[refs[0]].as_int = + xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + single_newmv[refs[0]].as_int = tmp_mv.as_int; + } } // if we're near/nearest and mv == 0,0, compare to zeromv @@ -2856,10 +2826,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < num_refs; ++i) { cur_mv[i] = frame_mv[refs[i]]; // Clip "next_nearest" so that it does not extend to far out of image - if (this_mode == NEWMV) - assert(!clamp_mv2(&cur_mv[i], xd)); - else - clamp_mv2(&cur_mv[i], xd); + if (this_mode != NEWMV) + clamp_mv2(&cur_mv[i].as_mv, xd); if (mv_check_bounds(x, &cur_mv[i])) return INT64_MAX; @@ -2918,7 +2886,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int is_intpel_interp = intpel_mv; mbmi->interp_filter = filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = get_switchable_rate(cm, x); + rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (interpolating_intpel_seen && is_intpel_interp) { @@ -2995,11 +2963,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].dst.stride = orig_dst_stride[i]; } } - // Set the appripriate filter + // Set the appropriate filter mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? cm->mcomp_filter_type : *best_filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0); + rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -3033,55 +3001,82 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->common.mcomp_filter_type == SWITCHABLE) - *rate2 += get_switchable_rate(cm, x); - - if (cpi->active_map_enabled && x->active_ptr[0] == 0) - x->skip = 1; - else if (x->encode_breakout) { - const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]); - const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize, &xd->plane[1]); - - unsigned int var, sse; - int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4); - - - if (threshold < x->encode_breakout) - threshold = x->encode_breakout; - - var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - &sse); - - if ((int)sse < threshold) { - unsigned int q2dc = xd->plane[0].dequant[0]; - // If there is no codeable 2nd order dc - // or a very small uniform pixel change change - if ((sse - var < q2dc * q2dc >> 4) || - (sse / 2 > var && sse - var < 64)) { - // Check u and v to make sure skip is ok - int sse2; - unsigned int sse2u, sse2v; - var = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, &sse2u); - var = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, - x->plane[2].src.stride, - xd->plane[2].dst.buf, - xd->plane[2].dst.stride, &sse2v); - sse2 = sse2u + sse2v; - - if (sse2 * 2 < threshold) { - x->skip = 1; - *distortion = sse + sse2; - *rate2 = 500; - - // for best yrd calculation - *rate_uv = 0; - *distortion_uv = sse2; - - *disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); + *rate2 += get_switchable_rate(x); + + if (!is_comp_pred) { + if (cpi->active_map_enabled && x->active_ptr[0] == 0) + x->skip = 1; + else if (x->encode_breakout) { + const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize, + &xd->plane[1]); + unsigned int var, sse; + // Skipping threshold for ac. + unsigned int thresh_ac; + // The encode_breakout input + unsigned int encode_breakout = x->encode_breakout << 4; + + // Calculate threshold according to dequant value. + thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; + + // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. + if (thresh_ac > 36000) + thresh_ac = 36000; + + // Use encode_breakout input if it is bigger than internal threshold. + if (thresh_ac < encode_breakout) + thresh_ac = encode_breakout; + + var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride, &sse); + + // Adjust threshold according to partition size. + thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + // Y skipping condition checking + if (sse < thresh_ac || sse == 0) { + // Skipping threshold for dc + unsigned int thresh_dc; + + thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); + + // dc skipping checking + if ((sse - var) < thresh_dc || sse == var) { + unsigned int sse_u, sse_v; + unsigned int var_u, var_v; + + var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, + x->plane[1].src.stride, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride, &sse_u); + + // U skipping condition checking + if ((sse_u * 4 < thresh_ac || sse_u == 0) && + (sse_u - var_u < thresh_dc || sse_u == var_u)) { + var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, + x->plane[2].src.stride, + xd->plane[2].dst.buf, + xd->plane[2].dst.stride, &sse_v); + + // V skipping condition checking + if ((sse_v * 4 < thresh_ac || sse_v == 0) && + (sse_v - var_v < thresh_dc || sse_v == var_v)) { + x->skip = 1; + + *rate2 = 500; + *rate_uv = 0; + + // Scaling factor for SSE from spatial domain to frequency domain + // is 16. Adjust distortion accordingly. + *distortion_uv = (sse_u + sse_v) << 4; + *distortion = (sse << 4) + *distortion_uv; + + *disable_skip = 1; + this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); + } + } } } } @@ -3133,15 +3128,13 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip; - int64_t dist_y = 0, dist_uv = 0, txfm_cache[NB_TXFM_MODES]; - + int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; x->skip_encode = 0; - vpx_memset(&txfm_cache, 0, sizeof(txfm_cache)); ctx->skip = 0; xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; if (bsize >= BLOCK_SIZE_SB8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, bsize, txfm_cache, + &dist_y, &y_skip, bsize, tx_cache, best_rd) >= best_rd) { *returnrate = INT_MAX; return; @@ -3150,8 +3143,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv, &uv_skip, bsize); } else { y_skip = 0; - if (rd_pick_intra4x4mby_modes(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, best_rd) >= best_rd) { + if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, best_rd) >= best_rd) { *returnrate = INT_MAX; return; } @@ -3163,17 +3156,15 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); *returndist = dist_y + (dist_uv >> 2); - memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); + vp9_zero(ctx->tx_rd_diff); } else { int i; *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); *returndist = dist_y + (dist_uv >> 2); - if (cpi->sf.tx_size_search_method == USE_FULL_RD) { - for (i = 0; i < NB_TXFM_MODES; i++) { - ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->tx_mode]; - } - } + if (cpi->sf.tx_size_search_method == USE_FULL_RD) + for (i = 0; i < TX_MODES; i++) + ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; } ctx->mic = *xd->mode_info_context; @@ -3189,9 +3180,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const struct segmentation *seg = &xd->seg; const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; - MV_REFERENCE_FRAME ref_frame; + MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; @@ -3205,8 +3197,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->alt_fb_idx}; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise - int64_t best_txfm_rd[NB_TXFM_MODES]; - int64_t best_txfm_diff[NB_TXFM_MODES]; + int64_t best_tx_rd[TX_MODES]; + int64_t best_tx_diff[TX_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1]; @@ -3222,10 +3214,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // MB_PREDICTION_MODE best_inter_mode = ZEROMV; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; - int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB]; - int64_t dist_uv[TX_SIZE_MAX_SB]; - int skip_uv[TX_SIZE_MAX_SB]; - MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB]; + int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; + int64_t dist_uv[TX_SIZES]; + int skip_uv[TX_SIZES]; + MB_PREDICTION_MODE mode_uv[TX_SIZES]; struct scale_factors scale_factor[4]; unsigned int ref_frame_mask = 0; unsigned int mode_mask = 0; @@ -3254,7 +3246,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ctx->frames_with_high_error = 0; ctx->modes_with_high_error = 0; - xd->mode_info_context->mbmi.segment_id = segment_id; estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); @@ -3262,16 +3253,17 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = INT64_MAX; - for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; + for (i = 0; i < TX_MODES; i++) + best_tx_rd[i] = INT64_MAX; for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) best_filter_rd[i] = INT64_MAX; - for (i = 0; i < TX_SIZE_MAX_SB; i++) + for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; *returnrate = INT_MAX; - // Create a mask set to 1 for each frame used by a smaller resolution. + // Create a mask set to 1 for each reference frame used by a smaller + // resolution. if (cpi->sf.use_avoid_tested_higherror) { switch (block_size) { case BLOCK_64X64: @@ -3321,24 +3313,26 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable; - int64_t txfm_cache[NB_TXFM_MODES]; + int64_t tx_cache[TX_MODES]; int i; int this_skip2 = 0; int64_t total_sse = INT_MAX; int early_term = 0; - for (i = 0; i < NB_TXFM_MODES; ++i) - txfm_cache[i] = INT64_MAX; + for (i = 0; i < TX_MODES; ++i) + tx_cache[i] = INT64_MAX; + x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; + second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; - // Slip modes that have been masked off but always consider first mode. - if ( mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) && + // Skip modes that have been masked off but always consider first mode. + if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) && (cpi->unused_mode_skip_mask & (1 << mode_index)) ) continue; - // Skip if the current refernce frame has been masked off + // Skip if the current reference frame has been masked off if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && (cpi->ref_frame_mask & (1 << ref_frame))) continue; @@ -3351,12 +3345,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Do not allow compound prediction if the segment level reference // frame feature is in use as in this case there can only be one reference. - if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) && - vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) + if ((second_ref_frame > INTRA_FRAME) && + vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; - x->skip = 0; - // Skip some checking based on small partitions' result. if (x->fast_ms > 1 && !ref_frame) continue; @@ -3370,51 +3362,49 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!(mode_mask & (1 << this_mode))) { continue; } - if (vp9_mode_order[mode_index].second_ref_frame != NONE - && !(ref_frame_mask - & (1 << vp9_mode_order[mode_index].second_ref_frame))) { + if (second_ref_frame != NONE + && !(ref_frame_mask & (1 << second_ref_frame))) { continue; } } mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame; + mbmi->ref_frame[1] = second_ref_frame; if (!(ref_frame == INTRA_FRAME || (cpi->ref_frame_flags & flag_list[ref_frame]))) { continue; } - if (!(mbmi->ref_frame[1] == NONE - || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) { + if (!(second_ref_frame == NONE + || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { continue; } - comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; + comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) continue; if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame && - vp9_mode_order[mode_index].second_ref_frame != best_inter_ref_frame) + if (ref_frame != best_inter_ref_frame && + second_ref_frame != best_inter_ref_frame) continue; } // TODO(jingning, jkoleszar): scaling reference frame not supported for // SPLITMV. - if (mbmi->ref_frame[0] > 0 && - (scale_factor[mbmi->ref_frame[0]].x_scale_fp != VP9_REF_NO_SCALE || - scale_factor[mbmi->ref_frame[0]].y_scale_fp != VP9_REF_NO_SCALE) && + if (ref_frame > 0 && + (scale_factor[ref_frame].x_scale_fp != VP9_REF_NO_SCALE || + scale_factor[ref_frame].y_scale_fp != VP9_REF_NO_SCALE) && this_mode == SPLITMV) continue; - if (mbmi->ref_frame[1] > 0 && - (scale_factor[mbmi->ref_frame[1]].x_scale_fp != VP9_REF_NO_SCALE || - scale_factor[mbmi->ref_frame[1]].y_scale_fp != VP9_REF_NO_SCALE) && + if (second_ref_frame > 0 && + (scale_factor[second_ref_frame].x_scale_fp != VP9_REF_NO_SCALE || + scale_factor[second_ref_frame].y_scale_fp != VP9_REF_NO_SCALE) && this_mode == SPLITMV) continue; - set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], - scale_factor); + set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; @@ -3431,46 +3421,43 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; if (comp_pred) { - if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]])) + if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; - set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], - scale_factor); + set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); mode_excluded = mode_excluded ? mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; } else { - // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1]; - if (ref_frame != INTRA_FRAME) { - if (mbmi->ref_frame[1] != INTRA_FRAME) - mode_excluded = - mode_excluded ? - mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; + if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { + mode_excluded = + mode_excluded ? + mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; } } - // Select predictors + // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; if (comp_pred) - xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; + xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME) && - vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) != + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && + vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; // If the segment skip feature is enabled.... // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP) && + } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. - } else if (!vp9_segfeature_active(&xd->seg, segment_id, + } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, // unless ARNR filtering is enabled in which case we want @@ -3506,9 +3493,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; */ + // I4X4_PRED is only considered for block sizes less than 8x8. mbmi->txfm_size = TX_4X4; - rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, - &distortion_y, INT64_MAX); + if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, + &distortion_y, best_rd) >= best_rd) + continue; rate2 += rate; rate2 += intra_cost_penalty; distortion2 += distortion_y; @@ -3524,11 +3513,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += dist_uv[TX_4X4]; distortion_uv = dist_uv[TX_4X4]; mbmi->uv_mode = mode_uv[TX_4X4]; - txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < NB_TXFM_MODES; ++i) - txfm_cache[i] = txfm_cache[ONLY_4X4]; + tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < TX_MODES; ++i) + tx_cache[i] = tx_cache[ONLY_4X4]; } else if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; + // Disable intra modes other than DC_PRED for blocks with low variance + // Threshold for intra skipping based on source variance + // TODO(debargha): Specialize the threshold for super block sizes + static const int skip_intra_var_thresh[BLOCK_SIZE_TYPES] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + }; + if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && + this_mode != DC_PRED && + x->source_variance < skip_intra_var_thresh[mbmi->sb_type]) + continue; // Only search the oblique modes if the best so far is // one of the neighboring directional modes if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && @@ -3541,7 +3540,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, - bsize, txfm_cache, best_rd); + bsize, tx_cache, best_rd); if (rate_y == INT_MAX) continue; @@ -3564,7 +3563,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else if (this_mode == SPLITMV) { - const int is_comp_pred = mbmi->ref_frame[1] > 0; + const int is_comp_pred = second_ref_frame > 0; int rate; int64_t distortion; int64_t this_rd_thresh; @@ -3574,7 +3573,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int tmp_best_skippable = 0; int switchable_filter_index; int_mv *second_ref = is_comp_pred ? - &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL; + &mbmi->ref_mvs[second_ref_frame][0] : NULL; union b_mode_info tmp_best_bmodes[16]; MB_MODE_INFO tmp_best_mbmode; PARTITION_INFO tmp_best_partition; @@ -3586,16 +3585,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) continue; if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame && - vp9_mode_order[mode_index].second_ref_frame != - best_inter_ref_frame) + if (ref_frame != best_inter_ref_frame && + second_ref_frame != best_inter_ref_frame) continue; } - this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ? + this_rd_thresh = (ref_frame == LAST_FRAME) ? cpi->rd_threshes[bsize][THR_NEWMV] : cpi->rd_threshes[bsize][THR_NEWA]; - this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ? + this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; xd->mode_info_context->mbmi.txfm_size = TX_4X4; @@ -3610,7 +3608,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], + &mbmi->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, &rate_y, &distortion, @@ -3622,7 +3620,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; - rs = get_switchable_rate(cm, x); + rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd); @@ -3672,7 +3670,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level tmp_rd = rd_pick_best_mbsegmentation(cpi, x, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], + &mbmi->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, &rate_y, &distortion, @@ -3684,7 +3682,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } else { if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = get_switchable_rate(cm, x); + int rs = get_switchable_rate(x); tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); } tmp_rd = tmp_best_rdu; @@ -3703,7 +3701,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion; if (cpi->common.mcomp_filter_type == SWITCHABLE) - rate2 += get_switchable_rate(cm, x); + rate2 += get_switchable_rate(x); if (!mode_excluded) { if (is_comp_pred) @@ -3728,15 +3726,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, skippable = skippable && uv_skippable; total_sse += uv_sse; - txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < NB_TXFM_MODES; ++i) - txfm_cache[i] = txfm_cache[ONLY_4X4]; + tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + for (i = 0; i < TX_MODES; ++i) + tx_cache[i] = tx_cache[ONLY_4X4]; } } else { - compmode_cost = vp9_cost_bit(comp_mode_p, - mbmi->ref_frame[1] > INTRA_FRAME); + compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME); this_rd = handle_inter_mode(cpi, x, bsize, - txfm_cache, + tx_cache, &rate2, &distortion2, &skippable, &rate_y, &distortion_y, &rate_uv, &distortion_uv, @@ -3754,10 +3751,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Estimate the reference frame signaling cost and add it // to the rolling cost variable. - if (mbmi->ref_frame[1] > INTRA_FRAME) { - rate2 += ref_costs_comp[mbmi->ref_frame[0]]; + if (second_ref_frame > INTRA_FRAME) { + rate2 += ref_costs_comp[ref_frame]; } else { - rate2 += ref_costs_single[mbmi->ref_frame[0]]; + rate2 += ref_costs_single[ref_frame]; } if (!disable_skip) { @@ -3766,7 +3763,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // necessary adjustment for rate. Ignore if skip is coded at // segment level as the cost wont have been added in. // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(&xd->seg, segment_id, + const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP); if (skippable && bsize >= BLOCK_SIZE_SB8X8) { @@ -3787,8 +3784,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate2 += prob_skip_cost; } } - } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && - !xd->lossless) { + } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -3835,7 +3831,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // best_inter_mode = xd->mode_info_context->mbmi.mode; } - if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) { + if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) @@ -3848,9 +3844,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, || distortion2 < mode_distortions[this_mode]) { mode_distortions[this_mode] = distortion2; } - if (frame_distortions[mbmi->ref_frame[0]] == -1 - || distortion2 < frame_distortions[mbmi->ref_frame[0]]) { - frame_distortions[mbmi->ref_frame[0]] = distortion2; + if (frame_distortions[ref_frame] == -1 + || distortion2 < frame_distortions[ref_frame]) { + frame_distortions[ref_frame] = distortion2; } } @@ -3858,8 +3854,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (this_rd < best_rd || x->skip) { if (!mode_excluded) { // Note index of best mode so far - const int qstep = xd->plane[0].dequant[1]; - best_mode_index = mode_index; if (ref_frame == INTRA_FRAME) { @@ -3882,9 +3876,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history - if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) - if (ref_frame > INTRA_FRAME && distortion2 * 4 < qstep * qstep) + if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) { + const int qstep = xd->plane[0].dequant[1]; + // TODO(debargha): Enhance this by specializing for each mode_index + int scale = 4; + if (x->source_variance < UINT_MAX) { + const int var_adjust = (x->source_variance < 16); + scale -= var_adjust; + } + if (ref_frame > INTRA_FRAME && + distortion2 * scale < qstep * qstep) { early_term = 1; + } + } } #if 0 // Testing this mode gave rise to an improvement in best error score. @@ -3912,7 +3916,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) { + if (!disable_skip && ref_frame != INTRA_FRAME) { int single_rd, hybrid_rd, single_rate, hybrid_rate; if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { @@ -3926,10 +3930,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (mbmi->ref_frame[1] <= INTRA_FRAME && + if (second_ref_frame <= INTRA_FRAME && single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->ref_frame[1] > INTRA_FRAME && + } else if (second_ref_frame > INTRA_FRAME && single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; } @@ -3938,7 +3942,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best filter type */ - if (!mode_excluded && !disable_skip && mbmi->ref_frame[0] != INTRA_FRAME && + if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && cm->mcomp_filter_type != BILINEAR) { int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? VP9_SWITCHABLE_FILTERS : @@ -3958,34 +3962,35 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (bsize < BLOCK_SIZE_SB32X32) { - if (bsize < BLOCK_SIZE_MB16X16) { + if (bsize < BLOCK_32X32) { + if (bsize < BLOCK_16X16) { if (this_mode == SPLITMV || this_mode == I4X4_PRED) - txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; - txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; + tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; + tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; } - txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; + tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; } if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { + for (i = 0; i < TX_MODES; i++) { int64_t adj_rd = INT64_MAX; if (this_mode != I4X4_PRED) { - adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->tx_mode]; + adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; } else { adj_rd = this_rd; } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; + if (adj_rd < best_tx_rd[i]) + best_tx_rd[i] = adj_rd; } } if (early_term) break; - if (x->skip && !mode_excluded) + if (x->skip && !comp_pred) break; } + if (best_rd >= best_rd_so_far) return INT64_MAX; @@ -4044,7 +4049,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (cm->mcomp_filter_type == best_mbmode.interp_filter) || (best_mbmode.ref_frame[0] == INTRA_FRAME)); - // Updating rd_thresh_freq_fact[] here means that the differnt + // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best // choice for the current partition. It may well be better to keep a scaled // best rd so far value and update rd_thresh_freq_fact based on the mode/size @@ -4126,14 +4131,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = 0; + for (i = 0; i < TX_MODES; i++) { + if (best_tx_rd[i] == INT64_MAX) + best_tx_diff[i] = 0; else - best_txfm_diff[i] = best_rd - best_txfm_rd[i]; + best_tx_diff[i] = best_rd - best_tx_rd[i]; } } else { - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); + vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff)); } set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], @@ -4143,7 +4148,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]][0], - best_pred_diff, best_txfm_diff, best_filter_diff); + best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; } diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c index ef84cc5c0..9564edc84 100644 --- a/libvpx/vp9/encoder/vp9_segmentation.c +++ b/libvpx/vp9/encoder/vp9_segmentation.c @@ -57,8 +57,7 @@ void vp9_set_segment_data(VP9_PTR ptr, } // Based on set of segment counts calculate a probability tree -static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, - vp9_prob *segment_tree_probs) { +static void calc_segtree_probs(int *segcounts, vp9_prob *segment_tree_probs) { // Work out probabilities of each segment const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; @@ -75,7 +74,7 @@ static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, } // Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) { +static int cost_segmap(int *segcounts, vp9_prob *probs) { const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; const int c45 = segcounts[4] + segcounts[5]; @@ -189,13 +188,13 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi, int n; assert(bwl < bsl && bhl < bsl); - if (bsize == BLOCK_SIZE_SB64X64) { - subsize = BLOCK_SIZE_SB32X32; - } else if (bsize == BLOCK_SIZE_SB32X32) { - subsize = BLOCK_SIZE_MB16X16; + if (bsize == BLOCK_64X64) { + subsize = BLOCK_32X32; + } else if (bsize == BLOCK_32X32) { + subsize = BLOCK_16X16; } else { - assert(bsize == BLOCK_SIZE_MB16X16); - subsize = BLOCK_SIZE_SB8X8; + assert(bsize == BLOCK_16X16); + subsize = BLOCK_8X8; } for (n = 0; n < 4; n++) { @@ -211,7 +210,7 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi, void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + struct segmentation *seg = &cpi->mb.e_mbd.seg; int no_pred_cost; int t_pred_cost = INT_MAX; @@ -231,8 +230,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs)); - vpx_memset(xd->seg.pred_probs, 255, sizeof(xd->seg.pred_probs)); + vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); + vpx_memset(seg->pred_probs, 255, sizeof(seg->pred_probs)); vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); @@ -249,21 +248,21 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += 8, mi += 8) count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count, - t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64); + t_unpred_seg_counts, mi_row, mi_col, BLOCK_64X64); } } // Work out probability tree for coding segments without prediction // and the cost. - calc_segtree_probs(xd, no_pred_segcounts, no_pred_tree); - no_pred_cost = cost_segmap(xd, no_pred_segcounts, no_pred_tree); + calc_segtree_probs(no_pred_segcounts, no_pred_tree); + no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree); // Key frames cannot use temporal prediction if (cm->frame_type != KEY_FRAME) { // Work out probability tree for coding those segments not // predicted using the temporal method and the cost. - calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree); - t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree); + calc_segtree_probs(t_unpred_seg_counts, t_pred_tree); + t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree); // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { @@ -280,11 +279,11 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { - xd->seg.temporal_update = 1; - vpx_memcpy(xd->seg.tree_probs, t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(xd->seg.pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); + seg->temporal_update = 1; + vpx_memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree)); + vpx_memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { - xd->seg.temporal_update = 0; - vpx_memcpy(xd->seg.tree_probs, no_pred_tree, sizeof(no_pred_tree)); + seg->temporal_update = 0; + vpx_memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c index 821b7c6ca..a692c010e 100644 --- a/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -40,10 +40,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, int mv_col, uint8_t *pred) { const int which_mv = 0; - int_mv mv; - - mv.as_mv.row = mv_row; - mv.as_mv.col = mv_col; + MV mv = { mv_row, mv_col }; vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c index 4b9c6c8b4..caa89b218 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.c +++ b/libvpx/vp9/encoder/vp9_tokenize.c @@ -25,8 +25,8 @@ compressions, then generating vp9_context.c = initial stats. */ #ifdef ENTROPY_STATS -vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist[TX_SIZE_MAX_SB][BLOCK_TYPES]; +vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES]; +extern vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES]; #endif /* ENTROPY_STATS */ DECLARE_ALIGNED(16, extern const uint8_t, @@ -40,7 +40,7 @@ const int *vp9_dct_value_cost_ptr; static void fill_value_tokens() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; - vp9_extra_bit *const e = vp9_extra_bits; + const vp9_extra_bit *const e = vp9_extra_bits; int i = -DCT_MAX_VALUE; int sign = 1; @@ -69,7 +69,7 @@ static void fill_value_tokens() { // initialize the cost for extra bits for all possible coefficient value. { int cost = 0; - vp9_extra_bit *p = vp9_extra_bits + t[i].token; + const vp9_extra_bit *p = vp9_extra_bits + t[i].token; if (p->base_val) { const int extra = t[i].extra; @@ -95,18 +95,40 @@ struct tokenize_b_args { MACROBLOCKD *xd; TOKENEXTRA **tp; TX_SIZE tx_size; - int dry_run; }; +static void set_entropy_context_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct tokenize_b_args* const args = arg; + TX_SIZE tx_size = ss_txfrm_size >> 1; + MACROBLOCKD *xd = args->xd; + const int bwl = b_width_log2(bsize); + const int off = block >> (2 * tx_size); + const int mod = bwl - tx_size - xd->plane[plane].subsampling_x; + const int aoff = (off & ((1 << mod) - 1)) << tx_size; + const int loff = (off >> mod) << tx_size; + ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff; + ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff; + const int eob = xd->plane[plane].eobs[block]; + const int tx_size_in_blocks = 1 << tx_size; + + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff, + A, L); + } else { + vpx_memset(A, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + vpx_memset(L, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + } +} + static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { struct tokenize_b_args* const args = arg; VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - TX_SIZE tx_size = ss_txfrm_size / 2; - int dry_run = args->dry_run; - + const TX_SIZE tx_size = ss_txfrm_size >> 1; + const int tx_size_in_blocks = 1 << tx_size; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0, rc = 0; @@ -114,9 +136,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, const int eob = xd->plane[plane].eobs[block]; const PLANE_TYPE type = xd->plane[plane].plane_type; const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); - const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ? - BLOCK_SIZE_SB8X8 : mbmi->sb_type; - const int bwl = b_width_log2(sb_type); + const int bwl = b_width_log2(bsize); const int off = block >> (2 * tx_size); const int mod = bwl - tx_size - xd->plane[plane].subsampling_x; const int aoff = (off & ((1 << mod) - 1)) << tx_size; @@ -128,7 +148,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, const int16_t *scan, *nb; vp9_coeff_count *counts; vp9_coeff_probs_model *coef_probs; - const int ref = mbmi->ref_frame[0] != INTRA_FRAME; + const int ref = is_inter_block(mbmi); ENTROPY_CONTEXT above_ec, left_ec; uint8_t token_cache[1024]; const uint8_t *band_translate; @@ -146,22 +166,22 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, band_translate = vp9_coefband_trans_4x4; break; case TX_8X8: - above_ec = (A[0] + A[1]) != 0; - left_ec = (L[0] + L[1]) != 0; + above_ec = !!*(uint16_t *)A; + left_ec = !!*(uint16_t *)L; seg_eob = 64; scan = get_scan_8x8(get_tx_type_8x8(type, xd)); band_translate = vp9_coefband_trans_8x8plus; break; case TX_16X16: - above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; + above_ec = !!*(uint32_t *)A; + left_ec = !!*(uint32_t *)L; seg_eob = 256; scan = get_scan_16x16(get_tx_type_16x16(type, xd)); band_translate = vp9_coefband_trans_8x8plus; break; case TX_32X32: - above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; + above_ec = !!*(uint64_t *)A; + left_ec = !!*(uint64_t *)L; seg_eob = 1024; scan = vp9_default_scan_32x32; band_translate = vp9_coefband_trans_8x8plus; @@ -198,22 +218,21 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0); - if (!dry_run) { - ++counts[type][ref][band][pt][token]; - if (!t->skip_eob_node) - ++cpi->common.counts.eob_branch[tx_size][type][ref][band][pt]; - } - token_cache[scan[c]] = vp9_pt_energy_class[token]; + ++counts[type][ref][band][pt][token]; + if (!t->skip_eob_node) + ++cpi->common.counts.eob_branch[tx_size][type][ref][band][pt]; + + token_cache[rc] = vp9_pt_energy_class[token]; ++t; } while (c < eob && ++c < seg_eob); *tp = t; if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, tx_size, c, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, c, aoff, loff, + A, L); } else { - for (pt = 0; pt < (1 << tx_size); pt++) { - A[pt] = L[pt] = c > 0; - } + vpx_memset(A, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + vpx_memset(L, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } } @@ -257,8 +276,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, const int mb_skip_context = vp9_get_pred_context_mbskip(xd); const int skip_inc = !vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP); - const TX_SIZE txfm_size = mbmi->txfm_size; - struct tokenize_b_args arg = { cpi, xd, t, txfm_size, dry_run }; + struct tokenize_b_args arg = {cpi, xd, t, mbmi->txfm_size}; mbmi->mb_skip_coeff = vp9_sb_is_skippable(xd, bsize); if (mbmi->mb_skip_coeff) { @@ -270,13 +288,13 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, return; } - if (!dry_run) + if (!dry_run) { cm->counts.mbskip[mb_skip_context][0] += skip_inc; - - foreach_transformed_block(xd, bsize, tokenize_b, &arg); - - if (dry_run) + foreach_transformed_block(xd, bsize, tokenize_b, &arg); + } else { + foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); *t = t_backup; + } } #ifdef ENTROPY_STATS diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h index bc7d9352e..968bec75e 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.h +++ b/libvpx/vp9/encoder/vp9_tokenize.h @@ -43,7 +43,7 @@ void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, void init_context_counters(); void print_context_counters(); -extern vp9_coeff_accum context_counters[TX_SIZE_MAX_SB][BLOCK_TYPES]; +extern vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES]; #endif extern const int *vp9_dct_value_cost_ptr; diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk index 5a0c1c958..b2b2a80a7 100644 --- a/libvpx/vp9/vp9_common.mk +++ b/libvpx/vp9/vp9_common.mk @@ -74,7 +74,6 @@ VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_ss VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm @@ -83,6 +82,10 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm endif +ifeq ($(USE_X86INC),yes) +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm +endif + VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c @@ -91,5 +94,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM) $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh)) diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk index dee83c9e4..288c0d829 100644 --- a/libvpx/vp9/vp9cx.mk +++ b/libvpx/vp9/vp9cx.mk @@ -83,11 +83,15 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm + +ifeq ($(USE_X86INC),yes) +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm +endif + ifeq ($(ARCH_X86_64),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm endif diff --git a/libvpx/vp9/vp9dx.mk b/libvpx/vp9/vp9dx.mk index 6cad29329..be3afe835 100644 --- a/libvpx/vp9/vp9dx.mk +++ b/libvpx/vp9/vp9dx.mk @@ -28,6 +28,8 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h VP9_DX_SRCS-yes += decoder/vp9_onyxd.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h +VP9_DX_SRCS-yes += decoder/vp9_thread.c +VP9_DX_SRCS-yes += decoder/vp9_thread.h VP9_DX_SRCS-yes += decoder/vp9_treereader.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c diff --git a/libvpx/vpx_scale/generic/yv12config.c b/libvpx/vpx_scale/generic/yv12config.c index b18155be6..259204065 100644 --- a/libvpx/vpx_scale/generic/yv12config.c +++ b/libvpx/vpx_scale/generic/yv12config.c @@ -60,7 +60,7 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without intoducing an arbitrary gap + * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) @@ -158,7 +158,7 @@ int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without intoducing an arbitrary gap + * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) diff --git a/mips-dspr2/libvpx_srcs.txt b/mips-dspr2/libvpx_srcs.txt index d75620883..299d615be 100644 --- a/mips-dspr2/libvpx_srcs.txt +++ b/mips-dspr2/libvpx_srcs.txt @@ -203,6 +203,8 @@ vp9/decoder/vp9_onyxd.h vp9/decoder/vp9_onyxd_if.c vp9/decoder/vp9_onyxd_int.h vp9/decoder/vp9_read_bit_buffer.h +vp9/decoder/vp9_thread.c +vp9/decoder/vp9_thread.h vp9/decoder/vp9_treereader.h vp9/vp9_common.mk vp9/vp9_dx_iface.c diff --git a/mips-dspr2/vp9_rtcd.h b/mips-dspr2/vp9_rtcd.h index 0752f4590..d6dc6bfb6 100644 --- a/mips-dspr2/vp9_rtcd.h +++ b/mips-dspr2/vp9_rtcd.h @@ -14,9 +14,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct4x4_add vp9_short_idct4x4_add_c +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c + void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct8x8_add vp9_short_idct8x8_add_c void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct16x16_add vp9_short_idct16x16_add_c @@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c - void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct32x32_add vp9_short_idct32x32_add_c void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output); #define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c - void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type); #define vp9_short_iht4x4_add vp9_short_iht4x4_add_c diff --git a/mips-dspr2/vpx_config.h b/mips-dspr2/vpx_config.h index 13a092db0..e85b676d4 100644 --- a/mips-dspr2/vpx_config.h +++ b/mips-dspr2/vpx_config.h @@ -39,6 +39,7 @@ #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 #define CONFIG_DEBUG 0 #define CONFIG_GPROF 0 #define CONFIG_GCOV 0 diff --git a/mips/libvpx_srcs.txt b/mips/libvpx_srcs.txt index 402ac2420..055f5fb5d 100644 --- a/mips/libvpx_srcs.txt +++ b/mips/libvpx_srcs.txt @@ -197,6 +197,8 @@ vp9/decoder/vp9_onyxd.h vp9/decoder/vp9_onyxd_if.c vp9/decoder/vp9_onyxd_int.h vp9/decoder/vp9_read_bit_buffer.h +vp9/decoder/vp9_thread.c +vp9/decoder/vp9_thread.h vp9/decoder/vp9_treereader.h vp9/vp9_common.mk vp9/vp9_dx_iface.c diff --git a/mips/vp9_rtcd.h b/mips/vp9_rtcd.h index 0752f4590..d6dc6bfb6 100644 --- a/mips/vp9_rtcd.h +++ b/mips/vp9_rtcd.h @@ -14,9 +14,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -260,14 +258,17 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct4x4_add vp9_short_idct4x4_add_c +void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct8x8_1_add vp9_short_idct8x8_1_add_c + void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct8x8_add vp9_short_idct8x8_add_c void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_8x8_add vp9_short_idct10_8x8_add_c -void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_8x8 vp9_short_idct1_8x8_c +void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_short_idct16x16_1_add vp9_short_idct16x16_1_add_c void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct16x16_add vp9_short_idct16x16_add_c @@ -275,18 +276,12 @@ void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct10_16x16_add vp9_short_idct10_16x16_add_c -void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output); -#define vp9_short_idct1_16x16 vp9_short_idct1_16x16_c - void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); #define vp9_short_idct32x32_add vp9_short_idct32x32_add_c void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output); #define vp9_short_idct1_32x32 vp9_short_idct1_32x32_c -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_short_idct10_32x32_add vp9_short_idct10_32x32_add_c - void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type); #define vp9_short_iht4x4_add vp9_short_iht4x4_add_c diff --git a/mips/vpx_config.h b/mips/vpx_config.h index 51ea388f1..7db47f873 100644 --- a/mips/vpx_config.h +++ b/mips/vpx_config.h @@ -39,6 +39,7 @@ #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 #define CONFIG_DEBUG 0 #define CONFIG_GPROF 0 #define CONFIG_GCOV 0 |