diff options
author | Johann <johannkoenig@google.com> | 2016-07-21 12:09:52 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2016-07-21 12:09:52 -0700 |
commit | 68e1c830ade592be74773e249bf94e2bbfb50de7 (patch) | |
tree | 08299f7deb6079690f0a3d2118ef3882fa77bdc6 /libvpx | |
parent | 96ebd06cb9832f583f7c181ec886eade209524df (diff) | |
download | libvpx-68e1c830ade592be74773e249bf94e2bbfb50de7.tar.gz |
Update external/libvpx to v1.6.0
Change-Id: I9425a3d3c3524d43823bc89f9f03556420c3dd42
Diffstat (limited to 'libvpx')
328 files changed, 19210 insertions, 14738 deletions
diff --git a/libvpx/.mailmap b/libvpx/.mailmap index 42f3617b0..94cb1ecfe 100644 --- a/libvpx/.mailmap +++ b/libvpx/.mailmap @@ -1,27 +1,28 @@ Adrian Grange <agrange@google.com> -Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com> Aℓex Converse <aconverse@google.com> Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com> Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com> Alpha Lam <hclam@google.com> <hclam@chromium.org> +Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com> Deb Mukherjee <debargha@google.com> Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com> Guillaume Martres <gmartres@google.com> <smarter3@gmail.com> Hangyu Kuang <hkuang@google.com> -Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com> Hui Su <huisu@google.com> Jacky Chen <jackychen@google.com> Jim Bankoski <jimbankoski@google.com> Johann Koenig <johannkoenig@google.com> Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com> -Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com> Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com> +Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org> John Koleszar <jkoleszar@google.com> Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org> Marco Paniconi <marpan@google.com> Marco Paniconi <marpan@google.com> <marpan@chromium.org> Pascal Massimino <pascal.massimino@gmail.com> Paul Wilkins <paulwilkins@google.com> +Peter de Rivaz <peter.derivaz@gmail.com> +Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com> Ralph Giles <giles@xiph.org> <giles@entropywave.com> Ralph Giles <giles@xiph.org> <giles@mozilla.com> Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com> @@ -29,8 +30,8 @@ Sami Pietilä <samipietila@google.com> Tamar Levy <tamar.levy@intel.com> Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com> Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com> -Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com> +Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com> Tom Finegan <tomfinegan@google.com> Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org> Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com> -Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com> +Yaowu Xu <yaowu@google.com> <Yaowu Xu> diff --git a/libvpx/AUTHORS b/libvpx/AUTHORS index f89b6776a..fcd5c534a 100644 --- a/libvpx/AUTHORS +++ b/libvpx/AUTHORS @@ -24,6 +24,7 @@ changjun.yang <changjun.yang@intel.com> Charles 'Buck' Krasic <ckrasic@google.com> chm <chm@rock-chips.com> Christian Duvivier <cduvivier@google.com> +Daniele Castagna <dcastagna@chromium.org> Daniel Kang <ddkang@google.com> Deb Mukherjee <debargha@google.com> Dim Temp <dimtemp0@gmail.com> @@ -56,10 +57,12 @@ James Zern <jzern@google.com> Jan Gerber <j@mailb.org> Jan Kratochvil <jan.kratochvil@redhat.com> Janne Salonen <jsalonen@google.com> +Jean-Yves Avenard <jyavenard@mozilla.com> Jeff Faust <jfaust@google.com> Jeff Muizelaar <jmuizelaar@mozilla.com> Jeff Petkau <jpet@chromium.org> Jia Jia <jia.jia@linaro.org> +Jian Zhou <zhoujian@google.com> Jim Bankoski <jimbankoski@google.com> Jingning Han <jingning@google.com> Joey Parrish <joeyparrish@google.com> @@ -74,6 +77,7 @@ Justin Clift <justin@salasaga.org> Justin Lebar <justin.lebar@gmail.com> KO Myung-Hun <komh@chollian.net> Lawrence Velázquez <larryv@macports.org> +Linfeng Zhang <linfengz@google.com> Lou Quillio <louquillio@google.com> Luca Barbato <lu_zero@gentoo.org> Makoto Kato <makoto.kt@gmail.com> @@ -107,9 +111,11 @@ Rob Bradford <rob@linux.intel.com> Ronald S. Bultje <rsbultje@gmail.com> Rui Ueyama <ruiu@google.com> Sami Pietilä <samipietila@google.com> +Sasi Inguva <isasi@google.com> Scott Graham <scottmg@chromium.org> Scott LaVarnway <slavarnway@google.com> Sean McGovern <gseanmcg@gmail.com> +Sergey Kolomenkin <kolomenkin@gmail.com> Sergey Ulanov <sergeyu@chromium.org> Shimon Doodkin <helpmepro1@gmail.com> Shunyao Li <shunyaoli@google.com> @@ -126,8 +132,10 @@ Timothy B. Terriberry <tterribe@xiph.org> Tom Finegan <tomfinegan@google.com> Vignesh Venkatasubramanian <vigneshv@google.com> Yaowu Xu <yaowu@google.com> +Yi Luo <luoyi@google.com> Yongzhe Wang <yongzhe@google.com> Yunqing Wang <yunqingwang@google.com> +Yury Gitman <yuryg@google.com> Zoe Liu <zoeliu@google.com> Google Inc. The Mozilla Foundation diff --git a/libvpx/CHANGELOG b/libvpx/CHANGELOG index 7746cc6c4..795d395f9 100644 --- a/libvpx/CHANGELOG +++ b/libvpx/CHANGELOG @@ -1,3 +1,33 @@ +2016-07-20 v1.6.0 "Khaki Campbell Duck" + This release improves upon the VP9 encoder and speeds up the encoding and + decoding processes. + + - Upgrading: + This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum + in vpx_image and some minor changes to the VP8_COMP structure. + + The default key frame interval for VP9 has changed from 128 to 9999. + + - Enhancement: + A core focus has been performance for low end Intel processors. SSSE3 + instructions such as 'pshufb' have been avoided and instructions have been + reordered to better accommodate the more constrained pipelines. + + As a result, devices based on Celeron processors have seen substantial + decoding improvements. From Indian Runner Duck to Javan Whistling Duck, + decoding speed improved between 10 and 30%. Between Javan Whistling Duck + and Khaki Campbell Duck, it improved another 10 to 15%. + + While Celeron benefited most, Core-i5 also improved 5% and 10% between the + respective releases. + + Realtime performance for WebRTC for both speed and quality has received a + lot of attention. + + - Bug Fixes: + A number of fuzzing issues, found variously by Mozilla, Chromium and others, + have been fixed and we strongly recommend updating. + 2015-11-09 v1.5.0 "Javan Whistling Duck" This release improves upon the VP9 encoder and speeds up the encoding and decoding processes. diff --git a/libvpx/README b/libvpx/README index 979440eb7..a8e6aebcd 100644 --- a/libvpx/README +++ b/libvpx/README @@ -1,4 +1,4 @@ -README - 23 March 2015 +README - 20 July 2016 Welcome to the WebM VP8/VP9 Codec SDK! @@ -47,7 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES: --help output of the configure script. As of this writing, the list of available targets is: - armv6-darwin-gcc armv6-linux-rvct armv6-linux-gcc armv6-none-rvct diff --git a/libvpx/build/make/Android.mk b/libvpx/build/make/Android.mk index df01dece6..9eb6dd280 100644 --- a/libvpx/build/make/Android.mk +++ b/libvpx/build/make/Android.mk @@ -174,9 +174,6 @@ endif ifeq ($(CONFIG_VP9), yes) $$(rtcd_dep_template_SRCS): vp9_rtcd.h endif -ifeq ($(CONFIG_VP10), yes) -$$(rtcd_dep_template_SRCS): vp10_rtcd.h -endif $$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h $$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h diff --git a/libvpx/build/make/Makefile b/libvpx/build/make/Makefile index 3081a9268..3e8c02490 100644 --- a/libvpx/build/make/Makefile +++ b/libvpx/build/make/Makefile @@ -119,29 +119,25 @@ utiltest: test-no-data-check:: exampletest-no-data-check utiltest-no-data-check: -# Add compiler flags for intrinsic files +# Force to realign stack always on OS/2 ifeq ($(TOOLCHAIN), x86-os2-gcc) -STACKREALIGN=-mstackrealign -else -STACKREALIGN= +CFLAGS += -mstackrealign endif $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx -$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN) -$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN) -$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN) -$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN) -$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN) -$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN) -$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN) -$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN) -$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN) -$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN) -$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN) -$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN) -$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN) -$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN) +$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 +$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 +$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 +$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 +$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 +$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 +$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 +$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 +$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx +$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx +$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 +$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh index c592b6385..4f0071bb5 100755 --- a/libvpx/build/make/configure.sh +++ b/libvpx/build/make/configure.sh @@ -185,6 +185,7 @@ add_extralibs() { # # Boolean Manipulation Functions # + enable_feature(){ set_all yes $* } @@ -201,6 +202,20 @@ disabled(){ eval test "x\$$1" = "xno" } +enable_codec(){ + enabled "${1}" || echo " enabling ${1}" + enable_feature "${1}" + + is_in "${1}" vp8 vp9 && enable_feature "${1}_encoder" "${1}_decoder" +} + +disable_codec(){ + disabled "${1}" || echo " disabling ${1}" + disable_feature "${1}" + + is_in "${1}" vp8 vp9 && disable_feature "${1}_encoder" "${1}_decoder" +} + # Iterates through positional parameters, checks to confirm the parameter has # not been explicitly (force) disabled, and enables the setting controlled by # the parameter when the setting is not disabled. @@ -521,22 +536,20 @@ process_common_cmdline() { ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` - if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then + if is_in ${option} ${ARCH_EXT_LIST}; then [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} " elif [ $action = "disable" ] && ! disabled $option ; then - echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null || - die_unknown $opt + is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt log_echo " disabling $option" elif [ $action = "enable" ] && ! enabled $option ; then - echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null || - die_unknown $opt + is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt log_echo " enabling $option" fi ${action}_feature $option ;; --require-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` - if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then + if is_in ${option} ${ARCH_EXT_LIST}; then RTCD_OPTIONS="${RTCD_OPTIONS}${opt} " else die_unknown $opt @@ -638,16 +651,39 @@ show_darwin_sdk_major_version() { xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1 } +# Print the Xcode version. +show_xcode_version() { + xcodebuild -version | head -n1 | cut -d' ' -f2 +} + +# Fails when Xcode version is less than 6.3. +check_xcode_minimum_version() { + xcode_major=$(show_xcode_version | cut -f1 -d.) + xcode_minor=$(show_xcode_version | cut -f2 -d.) + xcode_min_major=6 + xcode_min_minor=3 + if [ ${xcode_major} -lt ${xcode_min_major} ]; then + return 1 + fi + if [ ${xcode_major} -eq ${xcode_min_major} ] \ + && [ ${xcode_minor} -lt ${xcode_min_minor} ]; then + return 1 + fi +} + process_common_toolchain() { if [ -z "$toolchain" ]; then gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" # detect tgt_isa case "$gcctarget" in + aarch64*) + tgt_isa=arm64 + ;; armv6*) tgt_isa=armv6 ;; - armv7*-hardfloat*) + armv7*-hardfloat* | armv7*-gnueabihf | arm-*-gnueabihf) tgt_isa=armv7 float_abi=hard ;; @@ -688,6 +724,10 @@ process_common_toolchain() { tgt_isa=x86_64 tgt_os=darwin14 ;; + *darwin15*) + tgt_isa=x86_64 + tgt_os=darwin15 + ;; x86_64*mingw32*) tgt_os=win64 ;; @@ -744,7 +784,14 @@ process_common_toolchain() { enabled shared && soft_enable pic # Minimum iOS version for all target platforms (darwin and iphonesimulator). - IOS_VERSION_MIN="6.0" + # Shared library framework builds are only possible on iOS 8 and later. + if enabled shared; then + IOS_VERSION_OPTIONS="--enable-shared" + IOS_VERSION_MIN="8.0" + else + IOS_VERSION_OPTIONS="" + IOS_VERSION_MIN="6.0" + fi # Handle darwin variants. Newer SDKs allow targeting older # platforms, so use the newest one available. @@ -795,6 +842,10 @@ process_common_toolchain() { add_cflags "-mmacosx-version-min=10.10" add_ldflags "-mmacosx-version-min=10.10" ;; + *-darwin15-*) + add_cflags "-mmacosx-version-min=10.11" + add_ldflags "-mmacosx-version-min=10.11" + ;; *-iphonesimulator-*) add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}" @@ -869,7 +920,6 @@ process_common_toolchain() { case ${tgt_cc} in gcc) - CROSS=${CROSS:-arm-none-linux-gnueabi-} link_with_cc=gcc setup_gnu_toolchain arch_int=${tgt_isa##armv} @@ -891,6 +941,9 @@ EOF check_add_cflags -mfpu=neon #-ftree-vectorize check_add_asflags -mfpu=neon fi + elif [ ${tgt_isa} = "arm64" ] || [ ${tgt_isa} = "armv8" ]; then + check_add_cflags -march=armv8-a + check_add_asflags -march=armv8-a else check_add_cflags -march=${tgt_isa} check_add_asflags -march=${tgt_isa} @@ -958,6 +1011,10 @@ EOF ;; android*) + if [ -z "${sdk_path}" ]; then + die "Must specify --sdk-path for Android builds." + fi + SDK_PATH=${sdk_path} COMPILER_LOCATION=`find "${SDK_PATH}" \ -name "arm-linux-androideabi-gcc*" -print -quit` @@ -979,8 +1036,10 @@ EOF awk '{ print $1 }' | tail -1` fi - add_cflags "--sysroot=${alt_libc}" - add_ldflags "--sysroot=${alt_libc}" + if [ -d "${alt_libc}" ]; then + add_cflags "--sysroot=${alt_libc}" + add_ldflags "--sysroot=${alt_libc}" + fi # linker flag that routes around a CPU bug in some # Cortex-A8 implementations (NDK Dev Guide) @@ -1006,18 +1065,7 @@ EOF NM="$(${XCRUN_FIND} nm)" RANLIB="$(${XCRUN_FIND} ranlib)" AS_SFX=.s - - # Special handling of ld for armv6 because libclang_rt.ios.a does - # not contain armv6 support in Apple's clang package: - # Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn). - # TODO(tomfinegan): Remove this. Our minimum iOS version (6.0) - # renders support for armv6 unnecessary because the 3GS and up - # support neon. - if [ "${tgt_isa}" = "armv6" ]; then - LD="$(${XCRUN_FIND} ld)" - else - LD="${CXX:-$(${XCRUN_FIND} ld)}" - fi + LD="${CXX:-$(${XCRUN_FIND} ld)}" # ASFLAGS is written here instead of using check_add_asflags # because we need to overwrite all of ASFLAGS and purge the @@ -1043,6 +1091,19 @@ EOF [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}" done + case ${tgt_isa} in + armv7|armv7s|armv8|arm64) + if enabled neon && ! check_xcode_minimum_version; then + soft_disable neon + log_echo " neon disabled: upgrade Xcode (need v6.3+)." + if enabled neon_asm; then + soft_disable neon_asm + log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)." + fi + fi + ;; + esac + asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl" if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then @@ -1057,7 +1118,7 @@ EOF if enabled rvct; then # Check if we have CodeSourcery GCC in PATH. Needed for # libraries - hash arm-none-linux-gnueabi-gcc 2>&- || \ + which arm-none-linux-gnueabi-gcc 2>&- || \ die "Couldn't find CodeSourcery GCC from PATH" # Use armcc as a linker to enable translation of @@ -1098,7 +1159,7 @@ EOF check_add_ldflags -mfp64 ;; i6400) - check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight + check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight check_add_cflags -mload-store-pairs -mhard-float -mfp64 check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64 check_add_ldflags -mips64r6 -mabi=64 -mfp64 @@ -1125,7 +1186,7 @@ EOF CC=${CC:-${CROSS}gcc} CXX=${CXX:-${CROSS}g++} LD=${LD:-${CROSS}gcc} - CROSS=${CROSS:-g} + CROSS=${CROSS-g} ;; os2) disable_feature pic @@ -1178,6 +1239,12 @@ EOF soft_disable avx2 ;; esac + case $vc_version in + 7|8|9) + echo "${tgt_cc} omits stdint.h, disabling webm-io..." + soft_disable webm_io + ;; + esac ;; esac @@ -1198,33 +1265,43 @@ EOF soft_enable runtime_cpu_detect # We can't use 'check_cflags' until the compiler is configured and CC is # populated. - check_gcc_machine_option mmx - check_gcc_machine_option sse - check_gcc_machine_option sse2 - check_gcc_machine_option sse3 - check_gcc_machine_option ssse3 - check_gcc_machine_option sse4 sse4_1 - check_gcc_machine_option avx - check_gcc_machine_option avx2 - - case "${AS}" in - auto|"") - which nasm >/dev/null 2>&1 && AS=nasm - which yasm >/dev/null 2>&1 && AS=yasm - if [ "${AS}" = nasm ] ; then - # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit - # this check if they start shipping a compatible version. - apple=`nasm -v | grep "Apple"` - [ -n "${apple}" ] \ - && echo "Unsupported version of nasm: ${apple}" \ - && AS="" + for ext in ${ARCH_EXT_LIST_X86}; do + # disable higher order extensions to simplify asm dependencies + if [ "$disable_exts" = "yes" ]; then + if ! disabled $ext; then + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${ext} " + disable_feature $ext fi - [ "${AS}" = auto ] || [ -z "${AS}" ] \ - && die "Neither yasm nor nasm have been found." \ - "See the prerequisites section in the README for more info." - ;; - esac - log_echo " using $AS" + elif disabled $ext; then + disable_exts="yes" + else + # use the shortened version for the flag: sse4_1 -> sse4 + check_gcc_machine_option ${ext%_*} $ext + fi + done + + if enabled external_build; then + log_echo " skipping assembler detection" + else + case "${AS}" in + auto|"") + which nasm >/dev/null 2>&1 && AS=nasm + which yasm >/dev/null 2>&1 && AS=yasm + if [ "${AS}" = nasm ] ; then + # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit + # this check if they start shipping a compatible version. + apple=`nasm -v | grep "Apple"` + [ -n "${apple}" ] \ + && echo "Unsupported version of nasm: ${apple}" \ + && AS="" + fi + [ "${AS}" = auto ] || [ -z "${AS}" ] \ + && die "Neither yasm nor nasm have been found." \ + "See the prerequisites section in the README for more info." + ;; + esac + log_echo " using $AS" + fi [ "${AS##*/}" = nasm ] && add_asflags -Ox AS_SFX=.asm case ${tgt_os} in diff --git a/libvpx/build/make/gen_msvs_proj.sh b/libvpx/build/make/gen_msvs_proj.sh index 0cf335b3d..2b91fbfbc 100755 --- a/libvpx/build/make/gen_msvs_proj.sh +++ b/libvpx/build/make/gen_msvs_proj.sh @@ -193,7 +193,7 @@ for opt in "$@"; do done # Make one call to fix_path for file_list to improve performance. -fix_file_list +fix_file_list file_list outfile=${outfile:-/dev/stdout} guid=${guid:-`generate_uuid`} diff --git a/libvpx/build/make/gen_msvs_vcxproj.sh b/libvpx/build/make/gen_msvs_vcxproj.sh index 182ea28fa..e98611d10 100755 --- a/libvpx/build/make/gen_msvs_vcxproj.sh +++ b/libvpx/build/make/gen_msvs_vcxproj.sh @@ -211,7 +211,7 @@ for opt in "$@"; do done # Make one call to fix_path for file_list to improve performance. -fix_file_list +fix_file_list file_list outfile=${outfile:-/dev/stdout} guid=${guid:-`generate_uuid`} diff --git a/libvpx/build/make/ios-Info.plist b/libvpx/build/make/ios-Info.plist new file mode 100644 index 000000000..d157b11a0 --- /dev/null +++ b/libvpx/build/make/ios-Info.plist @@ -0,0 +1,37 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>VPX</string> + <key>CFBundleIdentifier</key> + <string>org.webmproject.VPX</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>VPX</string> + <key>CFBundlePackageType</key> + <string>FMWK</string> + <key>CFBundleShortVersionString</key> + <string>${VERSION}</string> + <key>CFBundleSignature</key> + <string>????</string> + <key>CFBundleSupportedPlatforms</key> + <array> + <string>iPhoneOS</string> + </array> + <key>CFBundleVersion</key> + <string>${VERSION}</string> + <key>MinimumOSVersion</key> + <string>${IOS_VERSION_MIN}</string> + <key>UIDeviceFamily</key> + <array> + <integer>1</integer> + <integer>2</integer> + </array> + <key>VPXFullVersion</key> + <string>${FULLVERSION}</string> +</dict> +</plist> diff --git a/libvpx/build/make/iosbuild.sh b/libvpx/build/make/iosbuild.sh index 6f7180d08..c703f22b0 100755 --- a/libvpx/build/make/iosbuild.sh +++ b/libvpx/build/make/iosbuild.sh @@ -24,16 +24,20 @@ CONFIGURE_ARGS="--disable-docs --disable-unit-tests" DIST_DIR="_dist" FRAMEWORK_DIR="VPX.framework" +FRAMEWORK_LIB="VPX.framework/VPX" HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx" SCRIPT_DIR=$(dirname "$0") LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd) LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo) ORIG_PWD="$(pwd)" -TARGETS="arm64-darwin-gcc - armv7-darwin-gcc - armv7s-darwin-gcc - x86-iphonesimulator-gcc - x86_64-iphonesimulator-gcc" +ARM_TARGETS="arm64-darwin-gcc + armv7-darwin-gcc + armv7s-darwin-gcc" +SIM_TARGETS="x86-iphonesimulator-gcc + x86_64-iphonesimulator-gcc" +OSX_TARGETS="x86-darwin15-gcc + x86_64-darwin15-gcc" +TARGETS="${ARM_TARGETS} ${SIM_TARGETS}" # Configures for the target specified by $1, and invokes make with the dist # target using $DIST_DIR as the distribution output directory. @@ -134,6 +138,44 @@ create_vpx_framework_config_shim() { printf "#endif // ${include_guard}" >> "${config_file}" } +# Verifies that $FRAMEWORK_LIB fat library contains requested builds. +verify_framework_targets() { + local requested_cpus="" + local cpu="" + + # Extract CPU from full target name. + for target; do + cpu="${target%%-*}" + if [ "${cpu}" = "x86" ]; then + # lipo -info outputs i386 for libvpx x86 targets. + cpu="i386" + fi + requested_cpus="${requested_cpus}${cpu} " + done + + # Get target CPUs present in framework library. + local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB}) + + # $LIPO -info outputs a string like the following: + # Architectures in the fat file: $FRAMEWORK_LIB <architectures> + # Capture only the architecture strings. + targets_built=${targets_built##*: } + + # Sort CPU strings to make the next step a simple string compare. + local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ") + local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ") + + vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}" + vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}" + + if [ "${requested}" != "${actual}" ]; then + elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list." + elog " Requested target CPUs: ${requested}" + elog " Actual target CPUs: ${actual}" + return 1 + fi +} + # Configures and builds each target specified by $1, and then builds # VPX.framework. build_framework() { @@ -154,7 +196,12 @@ build_framework() { for target in ${targets}; do build_target "${target}" target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}" - lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a" + if [ "${ENABLE_SHARED}" = "yes" ]; then + local suffix="dylib" + else + local suffix="a" + fi + lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}" done cd "${ORIG_PWD}" @@ -173,13 +220,25 @@ build_framework() { # Copy in vpx_version.h. cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}" - vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:" + if [ "${ENABLE_SHARED}" = "yes" ]; then + # Adjust the dylib's name so dynamic linking in apps works as expected. + install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX + + # Copy in Info.plist. + cat "${SCRIPT_DIR}/ios-Info.plist" \ + | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \ + | sed "s/\${VERSION}/${VERSION}/g" \ + | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \ + > "${FRAMEWORK_DIR}/Info.plist" + fi + + # Confirm VPX.framework/VPX contains the targets requested. + verify_framework_targets ${targets} + + vlog "Created fat library ${FRAMEWORK_LIB} containing:" for lib in ${lib_list}; do vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')" done - - # TODO(tomfinegan): Verify that expected targets are included within - # VPX.framework/VPX via lipo -info. } # Trap function. Cleans up the subtree used to build all targets contained in @@ -197,15 +256,28 @@ cleanup() { fi } +print_list() { + local indent="$1" + shift + local list="$@" + for entry in ${list}; do + echo "${indent}${entry}" + done +} + iosbuild_usage() { cat << EOF Usage: ${0##*/} [arguments] --help: Display this message and exit. + --enable-shared: Build a dynamic framework for use on iOS 8 or later. --extra-configure-args <args>: Extra args to pass when configuring libvpx. + --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86 + and x86_64. Allows linking to framework when builds target MacOSX + instead of iOS. --preserve-build-output: Do not delete the build directory. --show-build-output: Show output from each library build. --targets <targets>: Override default target list. Defaults: - ${TARGETS} +$(print_list " " ${TARGETS}) --test-link: Confirms all targets can be linked. Functionally identical to passing --enable-examples via --extra-configure-args. --verbose: Output information about the environment and each stage of the @@ -236,6 +308,9 @@ while [ -n "$1" ]; do iosbuild_usage exit ;; + --enable-shared) + ENABLE_SHARED=yes + ;; --preserve-build-output) PRESERVE_BUILD_OUTPUT=yes ;; @@ -249,6 +324,9 @@ while [ -n "$1" ]; do TARGETS="$2" shift ;; + --macosx) + TARGETS="${ARM_TARGETS} ${OSX_TARGETS}" + ;; --verbose) VERBOSE=yes ;; @@ -260,6 +338,21 @@ while [ -n "$1" ]; do shift done +if [ "${ENABLE_SHARED}" = "yes" ]; then + CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}" +fi + +FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}") +VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/') + +if [ "$ENABLE_SHARED" = "yes" ]; then + IOS_VERSION_OPTIONS="--enable-shared" + IOS_VERSION_MIN="8.0" +else + IOS_VERSION_OPTIONS="" + IOS_VERSION_MIN="6.0" +fi + if [ "${VERBOSE}" = "yes" ]; then cat << EOF BUILD_ROOT=${BUILD_ROOT} @@ -267,16 +360,24 @@ cat << EOF CONFIGURE_ARGS=${CONFIGURE_ARGS} EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS} FRAMEWORK_DIR=${FRAMEWORK_DIR} + FRAMEWORK_LIB=${FRAMEWORK_LIB} HEADER_DIR=${HEADER_DIR} LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR} LIPO=${LIPO} MAKEFLAGS=${MAKEFLAGS} ORIG_PWD=${ORIG_PWD} PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT} - TARGETS="${TARGETS}" + TARGETS="$(print_list "" ${TARGETS})" + ENABLE_SHARED=${ENABLE_SHARED} + OSX_TARGETS="${OSX_TARGETS}" + SIM_TARGETS="${SIM_TARGETS}" + SCRIPT_DIR="${SCRIPT_DIR}" + FULLVERSION="${FULLVERSION}" + VERSION="${VERSION}" + IOS_VERSION_MIN="${IOS_VERSION_MIN}" EOF fi build_framework "${TARGETS}" echo "Successfully built '${FRAMEWORK_DIR}' for:" -echo " ${TARGETS}" +print_list "" ${TARGETS} diff --git a/libvpx/build/make/msvs_common.sh b/libvpx/build/make/msvs_common.sh index 90c14888c..88f1cf9b5 100755 --- a/libvpx/build/make/msvs_common.sh +++ b/libvpx/build/make/msvs_common.sh @@ -39,11 +39,12 @@ fix_path() { } # Corrects the paths in file_list in one pass for efficiency. +# $1 is the name of the array to be modified. fix_file_list() { - # TODO(jzern): this could be more generic and take the array as a param. - files=$(fix_path "${file_list[@]}") + declare -n array_ref=$1 + files=$(fix_path "${array_ref[@]}") local IFS=$'\n' - file_list=($files) + array_ref=($files) } generate_uuid() { diff --git a/libvpx/build/make/version.sh b/libvpx/build/make/version.sh index b340142c9..696752777 100755 --- a/libvpx/build/make/version.sh +++ b/libvpx/build/make/version.sh @@ -24,8 +24,9 @@ out_file=${2} id=${3:-VERSION_STRING} git_version_id="" -if [ -d "${source_path}/.git" ]; then +if [ -e "${source_path}/.git" ]; then # Source Path is a git working copy. Check for local modifications. + # Note that git submodules may have a file as .git, not a directory. export GIT_DIR="${source_path}/.git" git_version_id=`git describe --match=v[0-9]* 2>/dev/null` fi diff --git a/libvpx/configure b/libvpx/configure index a40f3abb6..f82ee046b 100755 --- a/libvpx/configure +++ b/libvpx/configure @@ -35,9 +35,11 @@ Advanced options: ${toggle_debug_libs} in/exclude debug version of libraries ${toggle_static_msvcrt} use static MSVCRT (VS builds only) ${toggle_vp9_highbitdepth} use VP9 high bit depth (10/12) profiles + ${toggle_better_hw_compatibility} + enable encoder to produce streams with better + hardware decoder compatibility ${toggle_vp8} VP8 codec support ${toggle_vp9} VP9 codec support - ${toggle_vp10} VP10 codec support ${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders) ${toggle_postproc} postprocessing ${toggle_vp9_postproc} vp9 specific postprocessing @@ -95,11 +97,11 @@ EOF # all_platforms is a list of all supported target platforms. Maintain # alphabetically by architecture, generic-gnu last. -all_platforms="${all_platforms} armv6-darwin-gcc" +all_platforms="${all_platforms} arm64-darwin-gcc" +all_platforms="${all_platforms} arm64-linux-gcc" all_platforms="${all_platforms} armv6-linux-rvct" all_platforms="${all_platforms} armv6-linux-gcc" all_platforms="${all_platforms} armv6-none-rvct" -all_platforms="${all_platforms} arm64-darwin-gcc" all_platforms="${all_platforms} armv7-android-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8 @@ -109,6 +111,7 @@ all_platforms="${all_platforms} armv7-win32-vs11" all_platforms="${all_platforms} armv7-win32-vs12" all_platforms="${all_platforms} armv7-win32-vs14" all_platforms="${all_platforms} armv7s-darwin-gcc" +all_platforms="${all_platforms} armv8-linux-gcc" all_platforms="${all_platforms} mips32-linux-gcc" all_platforms="${all_platforms} mips64-linux-gcc" all_platforms="${all_platforms} sparc-solaris-gcc" @@ -122,6 +125,7 @@ all_platforms="${all_platforms} x86-darwin11-gcc" all_platforms="${all_platforms} x86-darwin12-gcc" all_platforms="${all_platforms} x86-darwin13-gcc" all_platforms="${all_platforms} x86-darwin14-gcc" +all_platforms="${all_platforms} x86-darwin15-gcc" all_platforms="${all_platforms} x86-iphonesimulator-gcc" all_platforms="${all_platforms} x86-linux-gcc" all_platforms="${all_platforms} x86-linux-icc" @@ -142,6 +146,7 @@ all_platforms="${all_platforms} x86_64-darwin11-gcc" all_platforms="${all_platforms} x86_64-darwin12-gcc" all_platforms="${all_platforms} x86_64-darwin13-gcc" all_platforms="${all_platforms} x86_64-darwin14-gcc" +all_platforms="${all_platforms} x86_64-darwin15-gcc" all_platforms="${all_platforms} x86_64-iphonesimulator-gcc" all_platforms="${all_platforms} x86_64-linux-gcc" all_platforms="${all_platforms} x86_64-linux-icc" @@ -190,12 +195,8 @@ if [ ${doxy_major:-0} -ge 1 ]; then fi # disable codecs when their source directory does not exist -[ -d "${source_path}/vp8" ] || disable_feature vp8 -[ -d "${source_path}/vp9" ] || disable_feature vp9 -[ -d "${source_path}/vp10" ] || disable_feature vp10 - -# disable vp10 codec by default -disable_feature vp10 +[ -d "${source_path}/vp8" ] || disable_codec vp8 +[ -d "${source_path}/vp9" ] || disable_codec vp9 # install everything except the sources, by default. sources will have # to be enabled when doing dist builds, since that's no longer a common @@ -217,13 +218,10 @@ CODECS=" vp8_decoder vp9_encoder vp9_decoder - vp10_encoder - vp10_decoder " CODEC_FAMILIES=" vp8 vp9 - vp10 " ARCH_LIST=" @@ -232,6 +230,16 @@ ARCH_LIST=" x86 x86_64 " +ARCH_EXT_LIST_X86=" + mmx + sse + sse2 + sse3 + ssse3 + sse4_1 + avx + avx2 +" ARCH_EXT_LIST=" edsp media @@ -243,21 +251,12 @@ ARCH_EXT_LIST=" msa mips64 - mmx - sse - sse2 - sse3 - ssse3 - sse4_1 - avx - avx2 + ${ARCH_EXT_LIST_X86} " HAVE_LIST=" ${ARCH_EXT_LIST} vpx_ports - stdint_h pthread_h - sys_mman_h unistd_h " EXPERIMENT_LIST=" @@ -317,6 +316,7 @@ CONFIG_LIST=" vp9_temporal_denoising coefficient_range_checking vp9_highbitdepth + better_hw_compatibility experimental size_limit ${EXPERIMENT_LIST} @@ -375,6 +375,7 @@ CMDLINE_SELECT=" temporal_denoising vp9_temporal_denoising coefficient_range_checking + better_hw_compatibility vp9_highbitdepth experimental " @@ -383,15 +384,19 @@ process_cmdline() { for opt do optval="${opt#*=}" case "$opt" in - --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;; + --disable-codecs) + for c in ${CODEC_FAMILIES}; do disable_codec $c; done + ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` - if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then + if is_in ${option} ${EXPERIMENT_LIST}; then if enabled experimental; then ${action}_feature $option else log_echo "Ignoring $opt -- not in experimental mode." fi + elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then + ${action}_codec ${option} else process_common_cmdline $opt fi @@ -405,14 +410,6 @@ process_cmdline() { post_process_cmdline() { c="" - # If the codec family is disabled, disable all components of that family. - # If the codec family is enabled, enable all components of that family. - log_echo "Configuring selected codecs" - for c in ${CODECS}; do - disabled ${c%%_*} && disable_feature ${c} - enabled ${c%%_*} && enable_feature ${c} - done - # Enable all detected codecs, if they haven't been disabled for c in ${CODECS}; do soft_enable $c; done @@ -507,13 +504,18 @@ process_detect() { # Can only build shared libs on a subset of platforms. Doing this check # here rather than at option parse time because the target auto-detect # magic happens after the command line has been parsed. - if ! enabled linux && ! enabled os2; then + case "${tgt_os}" in + linux|os2|darwin*|iphonesimulator*) + # Supported platforms + ;; + *) if enabled gnu; then echo "--enable-shared is only supported on ELF; assuming this is OK" else - die "--enable-shared only supported on ELF and OS/2 for now" + die "--enable-shared only supported on ELF, OS/2, and Darwin for now" fi - fi + ;; + esac fi if [ -z "$CC" ] || enabled external_build; then echo "Bypassing toolchain for environment detection." @@ -540,16 +542,12 @@ process_detect() { # Specialize windows and POSIX environments. case $toolchain in *-win*-*) - case $header-$toolchain in - stdint*-gcc) true;; - *) false;; - esac && enable_feature $var - ;; + # Don't check for any headers in Windows builds. + false + ;; *) case $header in - stdint.h) true;; pthread.h) true;; - sys/mman.h) true;; unistd.h) true;; *) false;; esac && enable_feature $var @@ -565,9 +563,7 @@ process_detect() { int main(void) {return 0;} EOF # check system headers - check_header stdint.h check_header pthread.h - check_header sys/mman.h check_header unistd.h # for sysconf(3) and friends. check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports @@ -598,7 +594,11 @@ process_toolchain() { ;; *) check_add_cflags -Wunused-but-set-variable ;; esac - enabled extra_warnings || check_add_cflags -Wno-unused-function + if enabled mips || [ -z "${INLINE}" ]; then + enabled extra_warnings || check_add_cflags -Wno-unused-function + else + check_add_cflags -Wunused-function + fi fi if enabled icc; then diff --git a/libvpx/examples.mk b/libvpx/examples.mk index f10bec68c..c891a5496 100644 --- a/libvpx/examples.mk +++ b/libvpx/examples.mk @@ -36,21 +36,30 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \ third_party/libyuv/source/scale_neon64.cc \ third_party/libyuv/source/scale_win.cc \ -LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp +LIBWEBM_COMMON_SRCS += third_party/libwebm/common/hdr_util.cc \ + third_party/libwebm/common/hdr_util.h \ + third_party/libwebm/common/webmids.h -LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \ - third_party/libwebm/mkvmuxerutil.cpp \ - third_party/libwebm/mkvwriter.cpp \ - third_party/libwebm/mkvmuxer.hpp \ - third_party/libwebm/mkvmuxertypes.hpp \ - third_party/libwebm/mkvmuxerutil.hpp \ - third_party/libwebm/mkvparser.hpp \ - third_party/libwebm/mkvwriter.hpp +LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer/mkvmuxer.cc \ + third_party/libwebm/mkvmuxer/mkvmuxerutil.cc \ + third_party/libwebm/mkvmuxer/mkvwriter.cc \ + third_party/libwebm/mkvmuxer/mkvmuxer.h \ + third_party/libwebm/mkvmuxer/mkvmuxertypes.h \ + third_party/libwebm/mkvmuxer/mkvmuxerutil.h \ + third_party/libwebm/mkvparser/mkvparser.h \ + third_party/libwebm/mkvmuxer/mkvwriter.h + +LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser/mkvparser.cc \ + third_party/libwebm/mkvparser/mkvreader.cc \ + third_party/libwebm/mkvparser/mkvparser.h \ + third_party/libwebm/mkvparser/mkvreader.h + +# Add compile flags and include path for libwebm sources. +ifeq ($(CONFIG_WEBM_IO),yes) + CXXFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS + INC_PATH-yes += $(SRC_PATH_BARE)/third_party/libwebm +endif -LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \ - third_party/libwebm/mkvreader.cpp \ - third_party/libwebm/mkvparser.hpp \ - third_party/libwebm/mkvreader.hpp # List of examples to build. UTILS are tools meant for distribution # while EXAMPLES demonstrate specific portions of the API. @@ -70,6 +79,7 @@ ifeq ($(CONFIG_LIBYUV),yes) endif ifeq ($(CONFIG_WEBM_IO),yes) vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS) + vpxdec.SRCS += $(LIBWEBM_MUXER_SRCS) vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS) vpxdec.SRCS += webmdec.cc webmdec.h endif @@ -93,6 +103,7 @@ endif ifeq ($(CONFIG_WEBM_IO),yes) vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS) vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS) + vpxenc.SRCS += $(LIBWEBM_PARSER_SRCS) vpxenc.SRCS += webmenc.cc webmenc.h endif vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 diff --git a/libvpx/examples/simple_encoder.c b/libvpx/examples/simple_encoder.c index a30772973..64f0a0137 100644 --- a/libvpx/examples/simple_encoder.c +++ b/libvpx/examples/simple_encoder.c @@ -109,8 +109,8 @@ static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile> " - "<keyframe-interval> [<error-resilient>]\nSee comments in " - "simple_encoder.c for more information.\n", + "<keyframe-interval> <error-resilient> <frames to encode>\n" + "See comments in simple_encoder.c for more information.\n", exec_name); exit(EXIT_FAILURE); } @@ -147,6 +147,7 @@ static int encode_frame(vpx_codec_ctx_t *codec, return got_pkts; } +// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps. int main(int argc, char **argv) { FILE *infile = NULL; vpx_codec_ctx_t codec; @@ -157,12 +158,11 @@ int main(int argc, char **argv) { VpxVideoInfo info = {0}; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; - const int fps = 30; // TODO(dkovalev) add command line argument - const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + const int fps = 30; + const int bitrate = 200; int keyframe_interval = 0; - - // TODO(dkovalev): Add some simple command line parsing code to make the - // command line more flexible. + int max_frames = 0; + int frames_encoded = 0; const char *codec_arg = NULL; const char *width_arg = NULL; const char *height_arg = NULL; @@ -172,7 +172,7 @@ int main(int argc, char **argv) { exec_name = argv[0]; - if (argc < 7) + if (argc != 9) die("Invalid number of arguments"); codec_arg = argv[1]; @@ -181,6 +181,7 @@ int main(int argc, char **argv) { infile_arg = argv[4]; outfile_arg = argv[5]; keyframe_interval_arg = argv[6]; + max_frames = strtol(argv[8], NULL, 0); encoder = get_vpx_encoder_by_name(codec_arg); if (!encoder) @@ -219,7 +220,7 @@ int main(int argc, char **argv) { cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = bitrate; - cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0; + cfg.g_error_resilient = strtol(argv[7], NULL, 0); writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); if (!writer) @@ -237,6 +238,9 @@ int main(int argc, char **argv) { if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) flags |= VPX_EFLAG_FORCE_KF; encode_frame(&codec, &raw, frame_count++, flags, writer); + frames_encoded++; + if (max_frames > 0 && frames_encoded >= max_frames) + break; } // Flush encoder. diff --git a/libvpx/examples/twopass_encoder.c b/libvpx/examples/twopass_encoder.c index aecc11d3f..15a6617cd 100644 --- a/libvpx/examples/twopass_encoder.c +++ b/libvpx/examples/twopass_encoder.c @@ -59,7 +59,9 @@ static const char *exec_name; void usage_exit(void) { - fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n", + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<frame limit>\n", exec_name); exit(EXIT_FAILURE); } @@ -129,7 +131,8 @@ static int encode_frame(vpx_codec_ctx_t *ctx, static vpx_fixed_buf_t pass0(vpx_image_t *raw, FILE *infile, const VpxInterface *encoder, - const vpx_codec_enc_cfg_t *cfg) { + const vpx_codec_enc_cfg_t *cfg, + int max_frames) { vpx_codec_ctx_t codec; int frame_count = 0; vpx_fixed_buf_t stats = {NULL, 0}; @@ -142,6 +145,8 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw, ++frame_count; get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, &stats); + if (max_frames > 0 && frame_count >= max_frames) + break; } // Flush encoder. @@ -159,7 +164,8 @@ static void pass1(vpx_image_t *raw, FILE *infile, const char *outfile_name, const VpxInterface *encoder, - const vpx_codec_enc_cfg_t *cfg) { + const vpx_codec_enc_cfg_t *cfg, + int max_frames) { VpxVideoInfo info = { encoder->fourcc, cfg->g_w, @@ -181,6 +187,9 @@ static void pass1(vpx_image_t *raw, while (vpx_img_read(raw, infile)) { ++frame_count; encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer); + + if (max_frames > 0 && frame_count >= max_frames) + break; } // Flush encoder. @@ -213,11 +222,14 @@ int main(int argc, char **argv) { const char *const height_arg = argv[3]; const char *const infile_arg = argv[4]; const char *const outfile_arg = argv[5]; + int max_frames = 0; exec_name = argv[0]; - if (argc != 6) + if (argc != 7) die("Invalid number of arguments."); + max_frames = strtol(argv[6], NULL, 0); + encoder = get_vpx_encoder_by_name(codec_arg); if (!encoder) die("Unsupported codec."); @@ -249,13 +261,13 @@ int main(int argc, char **argv) { // Pass 0 cfg.g_pass = VPX_RC_FIRST_PASS; - stats = pass0(&raw, infile, encoder, &cfg); + stats = pass0(&raw, infile, encoder, &cfg, max_frames); // Pass 1 rewind(infile); cfg.g_pass = VPX_RC_LAST_PASS; cfg.rc_twopass_stats_in = stats; - pass1(&raw, infile, outfile_arg, encoder, &cfg); + pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames); free(stats.buf); vpx_img_free(&raw); diff --git a/libvpx/examples/vp8_multi_resolution_encoder.c b/libvpx/examples/vp8_multi_resolution_encoder.c index 2b032049c..fc775ef7c 100644 --- a/libvpx/examples/vp8_multi_resolution_encoder.c +++ b/libvpx/examples/vp8_multi_resolution_encoder.c @@ -29,13 +29,6 @@ #include <math.h> #include <assert.h> #include <sys/time.h> -#if USE_POSIX_MMAP -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <fcntl.h> -#include <unistd.h> -#endif #include "vpx_ports/vpx_timer.h" #include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" @@ -354,8 +347,7 @@ int main(int argc, char **argv) double psnr_totals[NUM_ENCODERS][4] = {{0,0}}; int psnr_count[NUM_ENCODERS] = {0}; - double cx_time = 0; - struct timeval tv1, tv2, difftv; + int64_t cx_time = 0; /* Set the required target bitrates for each resolution level. * If target bitrate for highest-resolution level is set to 0, @@ -589,6 +581,7 @@ int main(int argc, char **argv) while(frame_avail || got_data) { + struct vpx_usec_timer timer; vpx_codec_iter_t iter[NUM_ENCODERS]={NULL}; const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS]; @@ -643,18 +636,18 @@ int main(int argc, char **argv) vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id); } - gettimeofday(&tv1, NULL); /* Encode each frame at multi-levels */ /* Note the flags must be set to 0 in the encode call if they are set for each frame with the vpx_codec_control(), as done above. */ + vpx_usec_timer_start(&timer); if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL, frame_cnt, 1, 0, arg_deadline)) { die_codec(&codec[0], "Failed to encode frame"); } - gettimeofday(&tv2, NULL); - timersub(&tv2, &tv1, &difftv); - cx_time += (double)(difftv.tv_sec * 1000000 + difftv.tv_usec); + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); + for (i=NUM_ENCODERS-1; i>=0 ; i--) { got_data = 0; @@ -693,8 +686,10 @@ int main(int argc, char **argv) frame_cnt++; } printf("\n"); - printf("FPS for encoding %d %f %f \n", frame_cnt, (float)cx_time / 1000000, - 1000000 * (double)frame_cnt / (double)cx_time); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, + 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); fclose(infile); diff --git a/libvpx/examples/vp9_spatial_svc_encoder.c b/libvpx/examples/vp9_spatial_svc_encoder.c index b26e98734..271ab704b 100644 --- a/libvpx/examples/vp9_spatial_svc_encoder.c +++ b/libvpx/examples/vp9_spatial_svc_encoder.c @@ -30,6 +30,7 @@ #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" +#include "vp9/encoder/vp9_encoder.h" #define OUTPUT_RC_STATS 1 static const arg_def_t skip_frames_arg = @@ -408,7 +409,10 @@ static void set_rate_control_stats(struct RateControlStats *rc, for (tl = 0; tl < cfg->ts_number_layers; ++tl) { const int layer = sl * cfg->ts_number_layers + tl; const int tlayer0 = sl * cfg->ts_number_layers; - rc->layer_framerate[layer] = + if (cfg->ts_number_layers == 1) + rc->layer_framerate[layer] = framerate; + else + rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl]; if (tl > 0) { rc->layer_pfb[layer] = 1000.0 * @@ -714,6 +718,7 @@ int main(int argc, const char **argv) { // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" // mode to "VP9E_LAYERING_MODE_BYPASS". if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + layer_id.spatial_layer_id = 0; // Example for 2 temporal layers. if (frame_cnt % 2 == 0) layer_id.temporal_layer_id = 0; @@ -729,6 +734,12 @@ int main(int argc, const char **argv) { &ref_frame_config); vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); + // Keep track of input frames, to account for frame drops in rate control + // stats/metrics. + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + + layer_id.temporal_layer_id]; + } } vpx_usec_timer_start(&timer); @@ -739,6 +750,7 @@ int main(int argc, const char **argv) { cx_time += vpx_usec_timer_elapsed(&timer); printf("%s", vpx_svc_get_message(&svc_ctx)); + fflush(stdout); if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); } @@ -746,6 +758,7 @@ int main(int argc, const char **argv) { while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { + SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; if (cx_pkt->data.frame.sz > 0) { #if OUTPUT_RC_STATS uint32_t sizes[8]; @@ -761,9 +774,16 @@ int main(int argc, const char **argv) { vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id); parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, sizes, &count); - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + - layer_id.temporal_layer_id]; + // Note computing input_layer_frames here won't account for frame + // drops in rate control stats. + // TODO(marpan): Fix this for non-bypass mode so we can get stats + // for dropped frames. + if (svc_ctx.temporal_layering_mode != + VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + + layer_id.temporal_layer_id]; + } } for (tl = layer_id.temporal_layer_id; tl < enc_cfg.ts_number_layers; ++tl) { @@ -834,6 +854,8 @@ int main(int argc, const char **argv) { printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); + if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) + si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; break; } @@ -854,6 +876,16 @@ int main(int argc, const char **argv) { pts += frame_duration; } } + + // Compensate for the extra frame count for the bypass mode. + if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + const int layer = sl * enc_cfg.ts_number_layers + + layer_id.temporal_layer_id; + --rc.layer_input_frames[layer]; + } + } + printf("Processed %d frames\n", frame_cnt); fclose(infile); #if OUTPUT_RC_STATS diff --git a/libvpx/examples/vpx_temporal_svc_encoder.c b/libvpx/examples/vpx_temporal_svc_encoder.c index 5adda9eeb..e6c09fb71 100644 --- a/libvpx/examples/vpx_temporal_svc_encoder.c +++ b/libvpx/examples/vpx_temporal_svc_encoder.c @@ -41,7 +41,7 @@ enum denoiserState { kDenoiserOnAdaptive }; -static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; +static int mode_to_num_layers[13] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3}; // For rate control encoding stats. struct RateControlMetrics { @@ -432,7 +432,32 @@ static void set_temporal_layer_pattern(int layering_mode, layer_flags[7] = layer_flags[3]; break; } - case 11: + case 11: { + // 3-layers structure with one reference frame. + // This works same as temporal_layering_mode 3. + // This was added to compare with vp9_spatial_svc_encoder. + + // 3-layers, 4-frame period. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. + layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + case 12: default: { // 3-layers structure as in case 10, but no sync/refresh points for // layer 1 and 2. @@ -530,7 +555,7 @@ int main(int argc, char **argv) { } layering_mode = strtol(argv[10], NULL, 0); - if (layering_mode < 0 || layering_mode > 12) { + if (layering_mode < 0 || layering_mode > 13) { die("Invalid layering mode (0..12) %s", argv[10]); } @@ -690,7 +715,7 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); - vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0); + vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); diff --git a/libvpx/ivfdec.c b/libvpx/ivfdec.c index 6dcd66f73..7fc25a0e8 100644 --- a/libvpx/ivfdec.c +++ b/libvpx/ivfdec.c @@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) { // we can guess the framerate using only the timebase in this // case. Other files would require reading ahead to guess the // timebase, like we do for webm. - if (*num < 1000) { + if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) { // Correct for the factor of 2 applied to the timebase in the encoder. if (*num & 1) *den *= 2; diff --git a/libvpx/libs.mk b/libvpx/libs.mk index f28d84a55..9a6092a51 100644 --- a/libvpx/libs.mk +++ b/libvpx/libs.mk @@ -109,40 +109,6 @@ endif VP9_PREFIX=vp9/ $(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra -# VP10 make file -ifeq ($(CONFIG_VP10),yes) - VP10_PREFIX=vp10/ - include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk -endif - -ifeq ($(CONFIG_VP10_ENCODER),yes) - VP10_PREFIX=vp10/ - include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10cx.mk - CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_CX_SRCS)) - CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_CX_EXPORTS)) - CODEC_SRCS-yes += $(VP10_PREFIX)vp10cx.mk vpx/vp8.h vpx/vp8cx.h - INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h - INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h - INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/% - CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h - CODEC_DOC_SECTIONS += vp9 vp9_encoder -endif - -ifeq ($(CONFIG_VP10_DECODER),yes) - VP10_PREFIX=vp10/ - include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10dx.mk - CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_DX_SRCS)) - CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_DX_EXPORTS)) - CODEC_SRCS-yes += $(VP10_PREFIX)vp10dx.mk vpx/vp8.h vpx/vp8dx.h - INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h - INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/% - CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h - CODEC_DOC_SECTIONS += vp9 vp9_decoder -endif - -VP10_PREFIX=vp10/ -$(BUILD_PFX)$(VP10_PREFIX)%.c.o: CFLAGS += -Wextra - ifeq ($(CONFIG_ENCODERS),yes) CODEC_DOC_SECTIONS += encoder endif @@ -183,6 +149,9 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm endif CODEC_EXPORTS-yes += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc +ifeq ($(CONFIG_SPATIAL_SVC),yes) +CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc +endif CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec INSTALL-LIBS-yes += include/vpx/vpx_codec.h @@ -260,7 +229,7 @@ OBJS-yes += $(LIBVPX_OBJS) LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) -SO_VERSION_MAJOR := 3 +SO_VERSION_MAJOR := 4 SO_VERSION_MINOR := 0 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) @@ -270,6 +239,12 @@ EXPORT_FILE := libvpx.syms LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \ libvpx.dylib ) else +ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS)) +LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib +SHARED_LIB_SUF := .dylib +EXPORT_FILE := libvpx.syms +LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, libvpx.dylib) +else ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll SHARED_LIB_SUF := _dll.a @@ -285,6 +260,7 @@ LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \ libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR)) endif endif +endif LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\ $(notdir $(LIBVPX_SO_SYMLINKS)) \ @@ -394,6 +370,12 @@ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h) CLEAN-OBJS += $(BUILD_PFX)vpx_version.h +# +# Add include path for libwebm sources. +# +ifeq ($(CONFIG_WEBM_IO),yes) + CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/libwebm +endif ## ## libvpx test directives @@ -429,12 +411,10 @@ testdata:: $(LIBVPX_TEST_DATA) if [ -n "$${sha1sum}" ]; then\ set -e;\ echo "Checking test data:";\ - if [ -n "$(LIBVPX_TEST_DATA)" ]; then\ - for f in $(call enabled,LIBVPX_TEST_DATA); do\ - grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ - (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\ - done; \ - fi; \ + for f in $(call enabled,LIBVPX_TEST_DATA); do\ + grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ + (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\ + done; \ else\ echo "Skipping test data integrity check, sha1sum not found.";\ fi @@ -471,6 +451,7 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ + $(if $(CONFIG_WEBM_IO),-I"$(SRC_PATH_BARE)/third_party/libwebm") \ -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^ PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX) diff --git a/libvpx/md5_utils.c b/libvpx/md5_utils.c index f4f893a2d..a9b979a41 100644 --- a/libvpx/md5_utils.c +++ b/libvpx/md5_utils.c @@ -150,12 +150,23 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) { #define MD5STEP(f,w,x,y,z,in,s) \ (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x) +#if defined(__clang__) && defined(__has_attribute) +#if __has_attribute(no_sanitize) +#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \ + __attribute__((no_sanitize("unsigned-integer-overflow"))) +#endif +#endif + +#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK +#define VPX_NO_UNSIGNED_OVERFLOW_CHECK +#endif + /* * The core of the MD5 algorithm, this alters an existing MD5 hash to * reflect the addition of 16 longwords of new data. MD5Update blocks * the data and converts bytes into longwords for this routine. */ -void +VPX_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) { register UWORD32 a, b, c, d; @@ -238,4 +249,6 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) { buf[3] += d; } +#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK + #endif diff --git a/libvpx/test/acm_random.h b/libvpx/test/acm_random.h index ff5c93ea1..b94b6e195 100644 --- a/libvpx/test/acm_random.h +++ b/libvpx/test/acm_random.h @@ -32,6 +32,12 @@ class ACMRandom { return (value >> 15) & 0xffff; } + int16_t Rand9Signed(void) { + // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). + const uint32_t value = random_.Generate(512); + return static_cast<int16_t>(value) - 256; + } + uint8_t Rand8(void) { const uint32_t value = random_.Generate(testing::internal::Random::kMaxRange); diff --git a/libvpx/test/active_map_test.cc b/libvpx/test/active_map_test.cc index 022199519..dc3de7213 100644 --- a/libvpx/test/active_map_test.cc +++ b/libvpx/test/active_map_test.cc @@ -85,5 +85,5 @@ TEST_P(ActiveMapTest, Test) { VP9_INSTANTIATE_TEST_CASE(ActiveMapTest, ::testing::Values(::libvpx_test::kRealTime), - ::testing::Range(0, 6)); + ::testing::Range(0, 9)); } // namespace diff --git a/libvpx/test/add_noise_test.cc b/libvpx/test/add_noise_test.cc new file mode 100644 index 000000000..e9945c409 --- /dev/null +++ b/libvpx/test/add_noise_test.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <math.h> +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" + +namespace { + +// TODO(jimbankoski): make width and height integers not unsigned. +typedef void (*AddNoiseFunc)(unsigned char *start, char *noise, + char blackclamp[16], char whiteclamp[16], + char bothclamp[16], unsigned int width, + unsigned int height, int pitch); + +class AddNoiseTest + : public ::testing::TestWithParam<AddNoiseFunc> { + public: + virtual void TearDown() { + libvpx_test::ClearSystemState(); + } + virtual ~AddNoiseTest() {} +}; + +double stddev6(char a, char b, char c, char d, char e, char f) { + const double n = (a + b + c + d + e + f) / 6.0; + const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) + + (d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) / + 6.0; + return sqrt(v); +} + +// TODO(jimbankoski): The following 2 functions are duplicated in each codec. +// For now the vp9 one has been copied into the test as is. We should normalize +// these in vpx_dsp and not have 3 copies of these unless there is different +// noise we add for each codec. + +double gaussian(double sigma, double mu, double x) { + return 1 / (sigma * sqrt(2.0 * 3.14159265)) * + (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))); +} + +int setup_noise(int size_noise, char *noise) { + char char_dist[300]; + const int ai = 4; + const int qi = 24; + const double sigma = ai + .5 + .6 * (63 - qi) / 63.0; + + /* set up a lookup table of 256 entries that matches + * a gaussian distribution with sigma determined by q. + */ + int next = 0; + + for (int i = -32; i < 32; i++) { + int a_i = (int) (0.5 + 256 * gaussian(sigma, 0, i)); + + if (a_i) { + for (int j = 0; j < a_i; j++) { + char_dist[next + j] = (char)(i); + } + + next = next + a_i; + } + } + + for (; next < 256; next++) + char_dist[next] = 0; + + for (int i = 0; i < size_noise; i++) { + noise[i] = char_dist[rand() & 0xff]; // NOLINT + } + + // Returns the most negative value in distribution. + return char_dist[0]; +} + +TEST_P(AddNoiseTest, CheckNoiseAdded) { + DECLARE_ALIGNED(16, char, blackclamp[16]); + DECLARE_ALIGNED(16, char, whiteclamp[16]); + DECLARE_ALIGNED(16, char, bothclamp[16]); + const int width = 64; + const int height = 64; + const int image_size = width * height; + char noise[3072]; + + const int clamp = setup_noise(3072, noise); + for (int i = 0; i < 16; i++) { + blackclamp[i] = -clamp; + whiteclamp[i] = -clamp; + bothclamp[i] = -2 * clamp; + } + + uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1)); + memset(s, 99, image_size); + + ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp, + bothclamp, width, height, width)); + + // Check to make sure we don't end up having either the same or no added + // noise either vertically or horizontally. + for (int i = 0; i < image_size - 6 * width - 6; ++i) { + const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99, + s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99); + const double vd = stddev6(s[i] - 99, s[i + width] - 99, + s[i + 2 * width] - 99, s[i + 3 * width] - 99, + s[i + 4 * width] - 99, s[i + 5 * width] - 99); + + EXPECT_NE(hd, 0); + EXPECT_NE(vd, 0); + } + + // Initialize pixels in the image to 255 and check for roll over. + memset(s, 255, image_size); + + ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp, + bothclamp, width, height, width)); + + // Check to make sure don't roll over. + for (int i = 0; i < image_size; ++i) { + EXPECT_GT((int)s[i], 10) << "i = " << i; + } + + // Initialize pixels in the image to 0 and check for roll under. + memset(s, 0, image_size); + + ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp, + bothclamp, width, height, width)); + + // Check to make sure don't roll under. + for (int i = 0; i < image_size; ++i) { + EXPECT_LT((int)s[i], 245) << "i = " << i; + } + + vpx_free(s); +} + +TEST_P(AddNoiseTest, CheckCvsAssembly) { + DECLARE_ALIGNED(16, char, blackclamp[16]); + DECLARE_ALIGNED(16, char, whiteclamp[16]); + DECLARE_ALIGNED(16, char, bothclamp[16]); + const int width = 64; + const int height = 64; + const int image_size = width * height; + char noise[3072]; + + const int clamp = setup_noise(3072, noise); + for (int i = 0; i < 16; i++) { + blackclamp[i] = -clamp; + whiteclamp[i] = -clamp; + bothclamp[i] = -2 * clamp; + } + + uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1)); + uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1)); + + memset(s, 99, image_size); + memset(d, 99, image_size); + + srand(0); + ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp, + bothclamp, width, height, width)); + srand(0); + ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp, + whiteclamp, bothclamp, + width, height, width)); + + for (int i = 0; i < image_size; ++i) { + EXPECT_EQ((int)s[i], (int)d[i]) << "i = " << i; + } + + vpx_free(d); + vpx_free(s); +} + +INSTANTIATE_TEST_CASE_P(C, AddNoiseTest, + ::testing::Values(vpx_plane_add_noise_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest, + ::testing::Values(vpx_plane_add_noise_sse2)); +#endif + +#if HAVE_MSA +INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest, + ::testing::Values(vpx_plane_add_noise_msa)); +#endif +} // namespace diff --git a/libvpx/test/altref_test.cc b/libvpx/test/altref_test.cc index af25b7285..d9f83d8cd 100644 --- a/libvpx/test/altref_test.cc +++ b/libvpx/test/altref_test.cc @@ -14,6 +14,8 @@ #include "test/util.h" namespace { +#if CONFIG_VP8_ENCODER + // lookahead range: [kLookAheadMin, kLookAheadMax). const int kLookAheadMin = 5; const int kLookAheadMax = 26; @@ -63,7 +65,95 @@ TEST_P(AltRefTest, MonotonicTimestamps) { EXPECT_GE(altref_count(), 1); } - VP8_INSTANTIATE_TEST_CASE(AltRefTest, ::testing::Range(kLookAheadMin, kLookAheadMax)); + +#endif // CONFIG_VP8_ENCODER + +class AltRefForcedKeyTestLarge + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { + protected: + AltRefForcedKeyTestLarge() + : EncoderTest(GET_PARAM(0)), + encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), + forced_kf_frame_num_(1), + frame_num_(0) {} + virtual ~AltRefForcedKeyTestLarge() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + cfg_.rc_end_usage = VPX_VBR; + cfg_.g_threads = 0; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, cpu_used_); + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + // override test default for tile columns if necessary. +#if CONFIG_VP9_ENCODER + if (GET_PARAM(0) == &libvpx_test::kVP9) { + encoder->Control(VP9E_SET_TILE_COLUMNS, 6); + } +#endif + } + frame_flags_ = + (video->frame() == forced_kf_frame_num_) ? VPX_EFLAG_FORCE_KF : 0; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (frame_num_ == forced_kf_frame_num_) { + ASSERT_TRUE(!!(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) + << "Frame #" << frame_num_ << " isn't a keyframe!"; + } + ++frame_num_; + } + + ::libvpx_test::TestMode encoding_mode_; + int cpu_used_; + unsigned int forced_kf_frame_num_; + unsigned int frame_num_; +}; + +TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) { + const vpx_rational timebase = { 1, 30 }; + const int lag_values[] = { 3, 15, 25, -1 }; + + forced_kf_frame_num_ = 1; + for (int i = 0; lag_values[i] != -1; ++i) { + frame_num_ = 0; + cfg_.g_lag_in_frames = lag_values[i]; + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } +} + +TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) { + const vpx_rational timebase = { 1, 30 }; + const int lag_values[] = { 3, 15, 25, -1 }; + + for (int i = 0; lag_values[i] != -1; ++i) { + frame_num_ = 0; + forced_kf_frame_num_ = lag_values[i] - 1; + cfg_.g_lag_in_frames = lag_values[i]; + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } +} + +VP8_INSTANTIATE_TEST_CASE( + AltRefForcedKeyTestLarge, + ::testing::Values(::libvpx_test::kOnePassGood), + ::testing::Range(0, 9)); + +VP9_INSTANTIATE_TEST_CASE( + AltRefForcedKeyTestLarge, + ::testing::Values(::libvpx_test::kOnePassGood), + ::testing::Range(0, 9)); } // namespace diff --git a/libvpx/test/vp9_avg_test.cc b/libvpx/test/avg_test.cc index d38313116..44d8dd7db 100644 --- a/libvpx/test/vp9_avg_test.cc +++ b/libvpx/test/avg_test.cc @@ -15,9 +15,7 @@ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" -#if CONFIG_VP9_ENCODER -#include "./vp9_rtcd.h" -#endif +#include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" @@ -57,19 +55,19 @@ class AverageTestBase : public ::testing::Test { } // Sum Pixels - unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) { + unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch) { unsigned int average = 0; for (int h = 0; h < 8; ++h) for (int w = 0; w < 8; ++w) - average += source[h * source_stride_ + w]; + average += source[h * pitch + w]; return ((average + 32) >> 6); } - unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) { + unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch) { unsigned int average = 0; for (int h = 0; h < 4; ++h) for (int w = 0; w < 4; ++w) - average += source[h * source_stride_ + w]; + average += source[h * pitch + w]; return ((average + 8) >> 4); } @@ -194,6 +192,48 @@ class IntProColTest int16_t sum_c_; }; +typedef int (*SatdFunc)(const int16_t *coeffs, int length); +typedef std::tr1::tuple<int, SatdFunc> SatdTestParam; + +class SatdTest + : public ::testing::Test, + public ::testing::WithParamInterface<SatdTestParam> { + protected: + virtual void SetUp() { + satd_size_ = GET_PARAM(0); + satd_func_ = GET_PARAM(1); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<int16_t*>( + vpx_memalign(16, sizeof(*src_) * satd_size_)); + ASSERT_TRUE(src_ != NULL); + } + + virtual void TearDown() { + libvpx_test::ClearSystemState(); + vpx_free(src_); + } + + void FillConstant(const int16_t val) { + for (int i = 0; i < satd_size_; ++i) src_[i] = val; + } + + void FillRandom() { + for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16(); + } + + void Check(const int expected) { + int total; + ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_)); + EXPECT_EQ(expected, total); + } + + int satd_size_; + + private: + int16_t *src_; + SatdFunc satd_func_; + ACMRandom rnd_; +}; uint8_t* AverageTestBase::source_data_ = NULL; @@ -246,69 +286,126 @@ TEST_P(IntProColTest, Random) { RunComparison(); } + +TEST_P(SatdTest, MinValue) { + const int kMin = -32640; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} + +TEST_P(SatdTest, MaxValue) { + const int kMax = 32640; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} + +TEST_P(SatdTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} + using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( C, AverageTest, ::testing::Values( - make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c), - make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c))); + make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c), + make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c))); + +INSTANTIATE_TEST_CASE_P( + C, SatdTest, + ::testing::Values( + make_tuple(16, &vpx_satd_c), + make_tuple(64, &vpx_satd_c), + make_tuple(256, &vpx_satd_c), + make_tuple(1024, &vpx_satd_c))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, AverageTest, ::testing::Values( - make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2), - make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2), - make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2), - make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2), - make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2), - make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2))); + make_tuple(16, 16, 0, 8, &vpx_avg_8x8_sse2), + make_tuple(16, 16, 5, 8, &vpx_avg_8x8_sse2), + make_tuple(32, 32, 15, 8, &vpx_avg_8x8_sse2), + make_tuple(16, 16, 0, 4, &vpx_avg_4x4_sse2), + make_tuple(16, 16, 5, 4, &vpx_avg_4x4_sse2), + make_tuple(32, 32, 15, 4, &vpx_avg_4x4_sse2))); INSTANTIATE_TEST_CASE_P( SSE2, IntProRowTest, ::testing::Values( - make_tuple(16, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c), - make_tuple(32, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c), - make_tuple(64, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c))); + make_tuple(16, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c), + make_tuple(32, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c), + make_tuple(64, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c))); INSTANTIATE_TEST_CASE_P( SSE2, IntProColTest, ::testing::Values( - make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c), - make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c), - make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c))); + make_tuple(16, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c), + make_tuple(32, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c), + make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c))); + +INSTANTIATE_TEST_CASE_P( + SSE2, SatdTest, + ::testing::Values( + make_tuple(16, &vpx_satd_sse2), + make_tuple(64, &vpx_satd_sse2), + make_tuple(256, &vpx_satd_sse2), + make_tuple(1024, &vpx_satd_sse2))); #endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, AverageTest, ::testing::Values( - make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon), - make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon), - make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon))); + make_tuple(16, 16, 0, 8, &vpx_avg_8x8_neon), + make_tuple(16, 16, 5, 8, &vpx_avg_8x8_neon), + make_tuple(32, 32, 15, 8, &vpx_avg_8x8_neon), + make_tuple(16, 16, 0, 4, &vpx_avg_4x4_neon), + make_tuple(16, 16, 5, 4, &vpx_avg_4x4_neon), + make_tuple(32, 32, 15, 4, &vpx_avg_4x4_neon))); INSTANTIATE_TEST_CASE_P( NEON, IntProRowTest, ::testing::Values( - make_tuple(16, &vp9_int_pro_row_neon, &vp9_int_pro_row_c), - make_tuple(32, &vp9_int_pro_row_neon, &vp9_int_pro_row_c), - make_tuple(64, &vp9_int_pro_row_neon, &vp9_int_pro_row_c))); + make_tuple(16, &vpx_int_pro_row_neon, &vpx_int_pro_row_c), + make_tuple(32, &vpx_int_pro_row_neon, &vpx_int_pro_row_c), + make_tuple(64, &vpx_int_pro_row_neon, &vpx_int_pro_row_c))); INSTANTIATE_TEST_CASE_P( NEON, IntProColTest, ::testing::Values( - make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c), - make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c), - make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c))); + make_tuple(16, &vpx_int_pro_col_neon, &vpx_int_pro_col_c), + make_tuple(32, &vpx_int_pro_col_neon, &vpx_int_pro_col_c), + make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c))); + +INSTANTIATE_TEST_CASE_P( + NEON, SatdTest, + ::testing::Values( + make_tuple(16, &vpx_satd_neon), + make_tuple(64, &vpx_satd_neon), + make_tuple(256, &vpx_satd_neon), + make_tuple(1024, &vpx_satd_neon))); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, AverageTest, ::testing::Values( - make_tuple(16, 16, 0, 8, &vp9_avg_8x8_msa), - make_tuple(16, 16, 5, 8, &vp9_avg_8x8_msa), - make_tuple(32, 32, 15, 8, &vp9_avg_8x8_msa), - make_tuple(16, 16, 0, 4, &vp9_avg_4x4_msa), - make_tuple(16, 16, 5, 4, &vp9_avg_4x4_msa), - make_tuple(32, 32, 15, 4, &vp9_avg_4x4_msa))); + make_tuple(16, 16, 0, 8, &vpx_avg_8x8_msa), + make_tuple(16, 16, 5, 8, &vpx_avg_8x8_msa), + make_tuple(32, 32, 15, 8, &vpx_avg_8x8_msa), + make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa), + make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa), + make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa))); #endif } // namespace diff --git a/libvpx/test/borders_test.cc b/libvpx/test/borders_test.cc index 6592375f8..bd3ac39f8 100644 --- a/libvpx/test/borders_test.cc +++ b/libvpx/test/borders_test.cc @@ -52,7 +52,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) { // extend into the border and test the border condition. cfg_.g_lag_in_frames = 25; cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 2000; cfg_.rc_max_quantizer = 10; @@ -80,7 +80,4 @@ TEST_P(BordersTest, TestLowBitrate) { VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values( ::libvpx_test::kTwoPassGood)); - -VP10_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values( - ::libvpx_test::kTwoPassGood)); } // namespace diff --git a/libvpx/test/byte_alignment_test.cc b/libvpx/test/byte_alignment_test.cc index aa4b78b9a..3a808b046 100644 --- a/libvpx/test/byte_alignment_test.cc +++ b/libvpx/test/byte_alignment_test.cc @@ -21,14 +21,14 @@ namespace { +#if CONFIG_WEBM_IO + const int kLegacyByteAlignment = 0; const int kLegacyYPlaneByteAlignment = 32; const int kNumPlanesToCheck = 3; const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5"; -#if CONFIG_WEBM_IO - struct ByteAlignmentTestParam { int byte_alignment; vpx_codec_err_t expected_value; diff --git a/libvpx/test/codec_factory.h b/libvpx/test/codec_factory.h index 09c9cf984..429d40d81 100644 --- a/libvpx/test/codec_factory.h +++ b/libvpx/test/codec_factory.h @@ -13,10 +13,10 @@ #include "./vpx_config.h" #include "vpx/vpx_decoder.h" #include "vpx/vpx_encoder.h" -#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif -#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER +#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif @@ -233,8 +233,6 @@ class VP9CodecFactory : public CodecFactory { int usage) const { #if CONFIG_VP9_ENCODER return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage); -#elif CONFIG_VP10_ENCODER - return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage); #else return VPX_CODEC_INCAPABLE; #endif @@ -253,96 +251,5 @@ const libvpx_test::VP9CodecFactory kVP9; #define VP9_INSTANTIATE_TEST_CASE(test, ...) #endif // CONFIG_VP9 -/* - * VP10 Codec Definitions - */ -#if CONFIG_VP10 -class VP10Decoder : public Decoder { - public: - VP10Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) - : Decoder(cfg, deadline) {} - - VP10Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag, - unsigned long deadline) // NOLINT - : Decoder(cfg, flag, deadline) {} - - protected: - virtual vpx_codec_iface_t* CodecInterface() const { -#if CONFIG_VP10_DECODER - return &vpx_codec_vp10_dx_algo; -#else - return NULL; -#endif - } -}; - -class VP10Encoder : public Encoder { - public: - VP10Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, - const unsigned long init_flags, TwopassStatsStore *stats) - : Encoder(cfg, deadline, init_flags, stats) {} - - protected: - virtual vpx_codec_iface_t* CodecInterface() const { -#if CONFIG_VP10_ENCODER - return &vpx_codec_vp10_cx_algo; -#else - return NULL; -#endif - } -}; - -class VP10CodecFactory : public CodecFactory { - public: - VP10CodecFactory() : CodecFactory() {} - - virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, - unsigned long deadline) const { - return CreateDecoder(cfg, 0, deadline); - } - - virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, - const vpx_codec_flags_t flags, - unsigned long deadline) const { // NOLINT -#if CONFIG_VP10_DECODER - return new VP10Decoder(cfg, flags, deadline); -#else - return NULL; -#endif - } - - virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg, - unsigned long deadline, - const unsigned long init_flags, - TwopassStatsStore *stats) const { -#if CONFIG_VP10_ENCODER - return new VP10Encoder(cfg, deadline, init_flags, stats); -#else - return NULL; -#endif - } - - virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg, - int usage) const { -#if CONFIG_VP10_ENCODER - return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage); -#else - return VPX_CODEC_INCAPABLE; -#endif - } -}; - -const libvpx_test::VP10CodecFactory kVP10; - -#define VP10_INSTANTIATE_TEST_CASE(test, ...)\ - INSTANTIATE_TEST_CASE_P(VP10, test, \ - ::testing::Combine( \ - ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \ - &libvpx_test::kVP10)), \ - __VA_ARGS__)) -#else -#define VP10_INSTANTIATE_TEST_CASE(test, ...) -#endif // CONFIG_VP10 - } // namespace libvpx_test #endif // TEST_CODEC_FACTORY_H_ diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc index 08267882d..73b0edb99 100644 --- a/libvpx/test/convolve_test.cc +++ b/libvpx/test/convolve_test.cc @@ -69,6 +69,21 @@ struct ConvolveFunctions { typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; +#define ALL_SIZES(convolve_fn) \ + make_tuple(4, 4, &convolve_fn), \ + make_tuple(8, 4, &convolve_fn), \ + make_tuple(4, 8, &convolve_fn), \ + make_tuple(8, 8, &convolve_fn), \ + make_tuple(16, 8, &convolve_fn), \ + make_tuple(8, 16, &convolve_fn), \ + make_tuple(16, 16, &convolve_fn), \ + make_tuple(32, 16, &convolve_fn), \ + make_tuple(16, 32, &convolve_fn), \ + make_tuple(32, 32, &convolve_fn), \ + make_tuple(64, 32, &convolve_fn), \ + make_tuple(32, 64, &convolve_fn), \ + make_tuple(64, 64, &convolve_fn) + // Reference 8-tap subpixel filter, slightly modified to fit into this test. #define VP9_FILTER_WEIGHT 128 #define VP9_FILTER_SHIFT 7 @@ -103,7 +118,8 @@ void filter_block2d_8_c(const uint8_t *src_ptr, // and filter_max_width = 16 // uint8_t intermediate_buffer[71 * kMaxDimension]; - const int intermediate_next_stride = 1 - intermediate_height * output_width; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); // Horizontal pass (src -> transposed intermediate). uint8_t *output_ptr = intermediate_buffer; @@ -215,7 +231,8 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr, * and filter_max_width = 16 */ uint16_t intermediate_buffer[71 * kMaxDimension]; - const int intermediate_next_stride = 1 - intermediate_height * output_width; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); // Horizontal pass (src -> transposed intermediate). { @@ -279,8 +296,7 @@ void highbd_block2d_average_c(uint16_t *src, uint16_t *output_ptr, unsigned int output_stride, unsigned int output_width, - unsigned int output_height, - int bd) { + unsigned int output_height) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { @@ -306,7 +322,7 @@ void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr, highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, output_width, output_height, bd); highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, - output_width, output_height, bd); + output_width, output_height); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -1035,20 +1051,6 @@ const ConvolveFunctions convolve8_c( wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); -INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_c), - make_tuple(8, 4, &convolve8_c), - make_tuple(4, 8, &convolve8_c), - make_tuple(8, 8, &convolve8_c), - make_tuple(16, 8, &convolve8_c), - make_tuple(8, 16, &convolve8_c), - make_tuple(16, 16, &convolve8_c), - make_tuple(32, 16, &convolve8_c), - make_tuple(16, 32, &convolve8_c), - make_tuple(32, 32, &convolve8_c), - make_tuple(64, 32, &convolve8_c), - make_tuple(32, 64, &convolve8_c), - make_tuple(64, 64, &convolve8_c))); const ConvolveFunctions convolve10_c( wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, @@ -1057,20 +1059,6 @@ const ConvolveFunctions convolve10_c( wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); -INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve10_c), - make_tuple(8, 4, &convolve10_c), - make_tuple(4, 8, &convolve10_c), - make_tuple(8, 8, &convolve10_c), - make_tuple(16, 8, &convolve10_c), - make_tuple(8, 16, &convolve10_c), - make_tuple(16, 16, &convolve10_c), - make_tuple(32, 16, &convolve10_c), - make_tuple(16, 32, &convolve10_c), - make_tuple(32, 32, &convolve10_c), - make_tuple(64, 32, &convolve10_c), - make_tuple(32, 64, &convolve10_c), - make_tuple(64, 64, &convolve10_c))); const ConvolveFunctions convolve12_c( wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, @@ -1079,23 +1067,13 @@ const ConvolveFunctions convolve12_c( wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); -INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve12_c), - make_tuple(8, 4, &convolve12_c), - make_tuple(4, 8, &convolve12_c), - make_tuple(8, 8, &convolve12_c), - make_tuple(16, 8, &convolve12_c), - make_tuple(8, 16, &convolve12_c), - make_tuple(16, 16, &convolve12_c), - make_tuple(32, 16, &convolve12_c), - make_tuple(16, 32, &convolve12_c), - make_tuple(32, 32, &convolve12_c), - make_tuple(64, 32, &convolve12_c), - make_tuple(32, 64, &convolve12_c), - make_tuple(64, 64, &convolve12_c))); +const ConvolveParam kArrayConvolve_c[] = { + ALL_SIZES(convolve8_c), + ALL_SIZES(convolve10_c), + ALL_SIZES(convolve12_c) +}; #else - const ConvolveFunctions convolve8_c( vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c, @@ -1104,22 +1082,10 @@ const ConvolveFunctions convolve8_c( vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); - -INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_c), - make_tuple(8, 4, &convolve8_c), - make_tuple(4, 8, &convolve8_c), - make_tuple(8, 8, &convolve8_c), - make_tuple(16, 8, &convolve8_c), - make_tuple(8, 16, &convolve8_c), - make_tuple(16, 16, &convolve8_c), - make_tuple(32, 16, &convolve8_c), - make_tuple(16, 32, &convolve8_c), - make_tuple(32, 32, &convolve8_c), - make_tuple(64, 32, &convolve8_c), - make_tuple(32, 64, &convolve8_c), - make_tuple(64, 64, &convolve8_c))); +const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; #endif +INSTANTIATE_TEST_CASE_P(C, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve_c)); #if HAVE_SSE2 && ARCH_X86_64 #if CONFIG_VP9_HIGHBITDEPTH @@ -1159,46 +1125,11 @@ const ConvolveFunctions convolve12_sse2( wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); -INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_sse2), - make_tuple(8, 4, &convolve8_sse2), - make_tuple(4, 8, &convolve8_sse2), - make_tuple(8, 8, &convolve8_sse2), - make_tuple(16, 8, &convolve8_sse2), - make_tuple(8, 16, &convolve8_sse2), - make_tuple(16, 16, &convolve8_sse2), - make_tuple(32, 16, &convolve8_sse2), - make_tuple(16, 32, &convolve8_sse2), - make_tuple(32, 32, &convolve8_sse2), - make_tuple(64, 32, &convolve8_sse2), - make_tuple(32, 64, &convolve8_sse2), - make_tuple(64, 64, &convolve8_sse2), - make_tuple(4, 4, &convolve10_sse2), - make_tuple(8, 4, &convolve10_sse2), - make_tuple(4, 8, &convolve10_sse2), - make_tuple(8, 8, &convolve10_sse2), - make_tuple(16, 8, &convolve10_sse2), - make_tuple(8, 16, &convolve10_sse2), - make_tuple(16, 16, &convolve10_sse2), - make_tuple(32, 16, &convolve10_sse2), - make_tuple(16, 32, &convolve10_sse2), - make_tuple(32, 32, &convolve10_sse2), - make_tuple(64, 32, &convolve10_sse2), - make_tuple(32, 64, &convolve10_sse2), - make_tuple(64, 64, &convolve10_sse2), - make_tuple(4, 4, &convolve12_sse2), - make_tuple(8, 4, &convolve12_sse2), - make_tuple(4, 8, &convolve12_sse2), - make_tuple(8, 8, &convolve12_sse2), - make_tuple(16, 8, &convolve12_sse2), - make_tuple(8, 16, &convolve12_sse2), - make_tuple(16, 16, &convolve12_sse2), - make_tuple(32, 16, &convolve12_sse2), - make_tuple(16, 32, &convolve12_sse2), - make_tuple(32, 32, &convolve12_sse2), - make_tuple(64, 32, &convolve12_sse2), - make_tuple(32, 64, &convolve12_sse2), - make_tuple(64, 64, &convolve12_sse2))); +const ConvolveParam kArrayConvolve_sse2[] = { + ALL_SIZES(convolve8_sse2), + ALL_SIZES(convolve10_sse2), + ALL_SIZES(convolve12_sse2) +}; #else const ConvolveFunctions convolve8_sse2( #if CONFIG_USE_X86INC @@ -1213,21 +1144,10 @@ const ConvolveFunctions convolve8_sse2( vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); -INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_sse2), - make_tuple(8, 4, &convolve8_sse2), - make_tuple(4, 8, &convolve8_sse2), - make_tuple(8, 8, &convolve8_sse2), - make_tuple(16, 8, &convolve8_sse2), - make_tuple(8, 16, &convolve8_sse2), - make_tuple(16, 16, &convolve8_sse2), - make_tuple(32, 16, &convolve8_sse2), - make_tuple(16, 32, &convolve8_sse2), - make_tuple(32, 32, &convolve8_sse2), - make_tuple(64, 32, &convolve8_sse2), - make_tuple(32, 64, &convolve8_sse2), - make_tuple(64, 64, &convolve8_sse2))); +const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; #endif // CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve_sse2)); #endif #if HAVE_SSSE3 @@ -1238,22 +1158,11 @@ const ConvolveFunctions convolve8_ssse3( vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, - vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); + vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0); -INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_ssse3), - make_tuple(8, 4, &convolve8_ssse3), - make_tuple(4, 8, &convolve8_ssse3), - make_tuple(8, 8, &convolve8_ssse3), - make_tuple(16, 8, &convolve8_ssse3), - make_tuple(8, 16, &convolve8_ssse3), - make_tuple(16, 16, &convolve8_ssse3), - make_tuple(32, 16, &convolve8_ssse3), - make_tuple(16, 32, &convolve8_ssse3), - make_tuple(32, 32, &convolve8_ssse3), - make_tuple(64, 32, &convolve8_ssse3), - make_tuple(32, 64, &convolve8_ssse3), - make_tuple(64, 64, &convolve8_ssse3))); +const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; +INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_ssse3)); #endif #if HAVE_AVX2 && HAVE_SSSE3 @@ -1266,20 +1175,9 @@ const ConvolveFunctions convolve8_avx2( vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); -INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_avx2), - make_tuple(8, 4, &convolve8_avx2), - make_tuple(4, 8, &convolve8_avx2), - make_tuple(8, 8, &convolve8_avx2), - make_tuple(8, 16, &convolve8_avx2), - make_tuple(16, 8, &convolve8_avx2), - make_tuple(16, 16, &convolve8_avx2), - make_tuple(32, 16, &convolve8_avx2), - make_tuple(16, 32, &convolve8_avx2), - make_tuple(32, 32, &convolve8_avx2), - make_tuple(64, 32, &convolve8_avx2), - make_tuple(32, 64, &convolve8_avx2), - make_tuple(64, 64, &convolve8_avx2))); +const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; +INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_avx2)); #endif // HAVE_AVX2 && HAVE_SSSE3 #if HAVE_NEON @@ -1303,20 +1201,9 @@ const ConvolveFunctions convolve8_neon( vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); #endif // HAVE_NEON_ASM -INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_neon), - make_tuple(8, 4, &convolve8_neon), - make_tuple(4, 8, &convolve8_neon), - make_tuple(8, 8, &convolve8_neon), - make_tuple(16, 8, &convolve8_neon), - make_tuple(8, 16, &convolve8_neon), - make_tuple(16, 16, &convolve8_neon), - make_tuple(32, 16, &convolve8_neon), - make_tuple(16, 32, &convolve8_neon), - make_tuple(32, 32, &convolve8_neon), - make_tuple(64, 32, &convolve8_neon), - make_tuple(32, 64, &convolve8_neon), - make_tuple(64, 64, &convolve8_neon))); +const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) }; +INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_neon)); #endif // HAVE_NEON #if HAVE_DSPR2 @@ -1329,21 +1216,10 @@ const ConvolveFunctions convolve8_dspr2( vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); -INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_dspr2), - make_tuple(8, 4, &convolve8_dspr2), - make_tuple(4, 8, &convolve8_dspr2), - make_tuple(8, 8, &convolve8_dspr2), - make_tuple(16, 8, &convolve8_dspr2), - make_tuple(8, 16, &convolve8_dspr2), - make_tuple(16, 16, &convolve8_dspr2), - make_tuple(32, 16, &convolve8_dspr2), - make_tuple(16, 32, &convolve8_dspr2), - make_tuple(32, 32, &convolve8_dspr2), - make_tuple(64, 32, &convolve8_dspr2), - make_tuple(32, 64, &convolve8_dspr2), - make_tuple(64, 64, &convolve8_dspr2))); -#endif +const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) }; +INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_dspr2)); +#endif // HAVE_DSPR2 #if HAVE_MSA const ConvolveFunctions convolve8_msa( @@ -1355,19 +1231,8 @@ const ConvolveFunctions convolve8_msa( vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); -INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values( - make_tuple(4, 4, &convolve8_msa), - make_tuple(8, 4, &convolve8_msa), - make_tuple(4, 8, &convolve8_msa), - make_tuple(8, 8, &convolve8_msa), - make_tuple(16, 8, &convolve8_msa), - make_tuple(8, 16, &convolve8_msa), - make_tuple(16, 16, &convolve8_msa), - make_tuple(32, 16, &convolve8_msa), - make_tuple(16, 32, &convolve8_msa), - make_tuple(32, 32, &convolve8_msa), - make_tuple(64, 32, &convolve8_msa), - make_tuple(32, 64, &convolve8_msa), - make_tuple(64, 64, &convolve8_msa))); +const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) }; +INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_msa)); #endif // HAVE_MSA } // namespace diff --git a/libvpx/test/cpu_speed_test.cc b/libvpx/test/cpu_speed_test.cc index 8baa2f9c8..2cad30fbb 100644 --- a/libvpx/test/cpu_speed_test.cc +++ b/libvpx/test/cpu_speed_test.cc @@ -26,7 +26,8 @@ class CpuSpeedTest : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)), - min_psnr_(kMaxPSNR) {} + min_psnr_(kMaxPSNR), + tune_content_(VP9E_CONTENT_DEFAULT) {} virtual ~CpuSpeedTest() {} virtual void SetUp() { @@ -49,6 +50,7 @@ class CpuSpeedTest ::libvpx_test::Encoder *encoder) { if (video->frame() == 1) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); @@ -66,6 +68,7 @@ class CpuSpeedTest ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; double min_psnr_; + int tune_content_; }; TEST_P(CpuSpeedTest, TestQ0) { @@ -74,7 +77,7 @@ TEST_P(CpuSpeedTest, TestQ0) { // the encoder to producing lots of big partitions which will likely // extend into the border and test the border condition. cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; @@ -92,7 +95,7 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) { ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25); cfg_.g_timebase = video.timebase(); cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; @@ -103,13 +106,28 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) { EXPECT_GE(min_psnr_, kMaxPSNR); } +TEST_P(CpuSpeedTest, TestTuneScreen) { + ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25); + cfg_.g_timebase = video.timebase(); + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + tune_content_ = VP9E_CONTENT_SCREEN; + + init_flags_ = VPX_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { // Validate that this non multiple of 64 wide clip encodes and decodes // without a mismatch when passing in a very low max q. This pushes // the encoder to producing lots of big partitions which will likely // extend into the border and test the border condition. cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 12000; cfg_.rc_max_quantizer = 10; cfg_.rc_min_quantizer = 0; @@ -125,7 +143,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) { // when passing in a very high min q. This pushes the encoder to producing // lots of small partitions which might will test the other condition. cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 200; cfg_.rc_min_quantizer = 40; @@ -140,9 +158,4 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(0, 9)); - -VP10_INSTANTIATE_TEST_CASE( - CpuSpeedTest, - ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), - ::testing::Range(0, 3)); } // namespace diff --git a/libvpx/test/datarate_test.cc b/libvpx/test/datarate_test.cc index b6cae7903..220cbf3a3 100644 --- a/libvpx/test/datarate_test.cc +++ b/libvpx/test/datarate_test.cc @@ -90,7 +90,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest, << pkt->data.frame.pts; } - const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + const int64_t frame_size_in_bits = pkt->data.frame.sz * 8; // Subtract from the buffer the bits associated with a played back frame. bits_in_buffer_model_ -= frame_size_in_bits; @@ -135,7 +135,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest, double duration_; double file_datarate_; double effective_datarate_; - size_t bits_in_last_frame_; + int64_t bits_in_last_frame_; int denoiser_on_; int denoiser_offon_test_; int denoiser_offon_period_; @@ -450,7 +450,28 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest, int denoiser_offon_period_; }; -// Check basic rate targeting, +// Check basic rate targeting for VBR mode. +TEST_P(DatarateTestVP9Large, BasicRateTargetingVBR) { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = VPX_VBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + for (int i = 400; i <= 800; i += 400) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25) + << " The datarate for the file is greater than target by too much!"; + } +} + +// Check basic rate targeting for CBR, TEST_P(DatarateTestVP9Large, BasicRateTargeting) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; @@ -474,7 +495,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting) { } } -// Check basic rate targeting, +// Check basic rate targeting for CBR. TEST_P(DatarateTestVP9Large, BasicRateTargeting444) { ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); @@ -519,6 +540,9 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) { cfg_.rc_end_usage = VPX_CBR; cfg_.rc_target_bitrate = 200; cfg_.g_lag_in_frames = 0; + // TODO(marpan): Investigate datarate target failures with a smaller keyframe + // interval (128). + cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); @@ -538,7 +562,7 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) { << " The first dropped frame for drop_thresh " << i << " > first dropped frame for drop_thresh " << i - kDropFrameThreshTestStep; - ASSERT_GE(num_drops_, last_num_drops * 0.90) + ASSERT_GE(num_drops_, last_num_drops * 0.85) << " The number of dropped frames for drop_thresh " << i << " < number of dropped frames for drop_thresh " << i - kDropFrameThreshTestStep; @@ -770,14 +794,10 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { int i; - for (i = 0; i < 2; ++i) { + for (i = 0; i < VPX_MAX_LAYERS; ++i) { svc_params_.max_quantizers[i] = 63; svc_params_.min_quantizers[i] = 0; } - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; encoder->Control(VP9E_SET_SVC, 1); encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); encoder->Control(VP8E_SET_CPUUSED, speed_setting_); @@ -814,8 +834,6 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest, if (bits_total_) { const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit duration_ = (last_pts_ + 1) * timebase_; - effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 - / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); file_datarate_ = file_size_in_kb / duration_; } } @@ -839,7 +857,6 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest, int64_t bits_total_; double duration_; double file_datarate_; - double effective_datarate_; size_t bits_in_last_frame_; vpx_svc_extra_cfg_t svc_params_; int speed_setting_; @@ -850,8 +867,7 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, const vpx_svc_extra_cfg_t *svc_params, int spatial_layers, int temporal_layers, - int temporal_layering_mode, - unsigned int total_rate) { + int temporal_layering_mode) { int sl, spatial_layer_target; float total = 0; float alloc_ratio[VPX_MAX_LAYERS] = {0}; @@ -885,7 +901,7 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Run CIF clip with 1 thread. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) { +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -905,31 +921,71 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) { svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_den[1] = 288; - // TODO(wonkap/marpan): No frame drop for now, we need to implement correct - // frame dropping for SVC. - cfg_.rc_dropframe_thresh = 0; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 200); // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the - // layer target_bitrate. Also check if test can pass at lower bitrate (~200k). - for (int i = 400; i <= 800; i += 200) { + // layer target_bitrate. + for (int i = 200; i <= 800; i += 200) { cfg_.rc_target_bitrate = i; ResetModel(); assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - cfg_.rc_target_bitrate); + cfg_.ts_number_layers, cfg_.temporal_layering_mode); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85) + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) << " The datarate for the file exceeds the target by too much!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) << " The datarate for the file is lower than the target by too much!"; - EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0); + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 +// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayersSmallKf) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 144; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 288; + svc_params_.scaling_factor_den[1] = 288; + cfg_.rc_dropframe_thresh = 10; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 200); + cfg_.rc_target_bitrate = 400; + // For this 3 temporal layer case, pattern repeats every 4 frames, so choose + // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). + for (int j = 64; j <= 67; j++) { + cfg_.kf_max_dist = j; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) + << " The datarate for the file is lower than the target by too much!"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); } } // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Run HD clip with 4 threads. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) { +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers4threads) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -949,30 +1005,152 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) { svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_den[1] = 288; - // TODO(wonkap/marpan): No frame drop for now, we need to implement correct - // frame dropping for SVC. - cfg_.rc_dropframe_thresh = 0; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30, 1, 0, 300); cfg_.rc_target_bitrate = 800; ResetModel(); assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - cfg_.rc_target_bitrate); + cfg_.ts_number_layers, cfg_.temporal_layering_mode); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85) + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) << " The datarate for the file exceeds the target by too much!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) << " The datarate for the file is lower than the target by too much!"; - EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0); + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Run CIF clip with 1 thread. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 3; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 72; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 144; + svc_params_.scaling_factor_den[1] = 288; + svc_params_.scaling_factor_num[2] = 288; + svc_params_.scaling_factor_den[2] = 288; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 800; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22) + << " The datarate for the file is lower than the target by too much!"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 +// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayersSmallKf) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 3; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 72; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 144; + svc_params_.scaling_factor_den[1] = 288; + svc_params_.scaling_factor_num[2] = 288; + svc_params_.scaling_factor_den[2] = 288; + cfg_.rc_dropframe_thresh = 10; + ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 800; + // For this 3 temporal layer case, pattern repeats every 4 frames, so choose + // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). + for (int j = 32; j <= 35; j++) { + cfg_.kf_max_dist = j; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30) + << " The datarate for the file is lower than the target by too much!"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Run HD clip with 4 threads. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers4threads) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 3; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 4; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 72; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 144; + svc_params_.scaling_factor_den[1] = 288; + svc_params_.scaling_factor_num[2] = 288; + svc_params_.scaling_factor_den[2] = 288; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 800; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22) + << " The datarate for the file is lower than the target by too much!"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); } VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(2, 7)); + ::testing::Range(2, 9)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc, ::testing::Values(::libvpx_test::kRealTime), - ::testing::Range(5, 8)); + ::testing::Range(5, 9)); } // namespace diff --git a/libvpx/test/dct16x16_test.cc b/libvpx/test/dct16x16_test.cc index 332210daa..ddaf9395b 100644 --- a/libvpx/test/dct16x16_test.cc +++ b/libvpx/test/dct16x16_test.cc @@ -276,12 +276,12 @@ void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) { } void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride, - int tx_type) { + int /*tx_type*/) { idct16x16_10(in, out, stride); } void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride, - int tx_type) { + int /*tx_type*/) { idct16x16_12(in, out, stride); } @@ -293,6 +293,7 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12); } +#if HAVE_SSE2 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_c(in, out, stride, 10); } @@ -301,7 +302,6 @@ void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_c(in, out, stride, 12); } -#if HAVE_SSE2 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10); } @@ -373,10 +373,10 @@ class Trans16x16TestBase { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else - const uint32_t diff = dst[j] - src[j]; + const int32_t diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; if (max_error < error) @@ -778,7 +778,7 @@ class InvTrans16x16DCT virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: - void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {} + void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {} void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } @@ -792,6 +792,67 @@ TEST_P(InvTrans16x16DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } +class PartialTrans16x16Test + : public ::testing::TestWithParam< + std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > { + public: + virtual ~PartialTrans16x16Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + vpx_bit_depth_t bit_depth_; + FdctFunc fwd_txfm_; +}; + +TEST_P(PartialTrans16x16Test, Extremes) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]); +} + +TEST_P(PartialTrans16x16Test, Random) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + int sum = 0; + for (int i = 0; i < kNumCoeffs; ++i) { + const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1); + input[i] = val; + sum += val; + } + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ(sum >> 1, output[0]); +} + using std::tr1::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH @@ -824,6 +885,11 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8), + make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10), + make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P( C, Trans16x16HT, @@ -832,6 +898,9 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_c, + VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -859,6 +928,9 @@ INSTANTIATE_TEST_CASE_P( VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -896,6 +968,9 @@ INSTANTIATE_TEST_CASE_P( &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12), make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -912,5 +987,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa, + VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/libvpx/test/dct32x32_test.cc b/libvpx/test/dct32x32_test.cc index f7327b100..16d88255e 100644 --- a/libvpx/test/dct32x32_test.cc +++ b/libvpx/test/dct32x32_test.cc @@ -81,10 +81,6 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t> Trans32x32Param; #if CONFIG_VP9_HIGHBITDEPTH -void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) { - vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8); -} - void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10); } @@ -158,10 +154,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else - const uint32_t diff = dst[j] - src[j]; + const int32_t diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; if (max_error < error) @@ -309,6 +305,67 @@ TEST_P(Trans32x32Test, InverseAccuracy) { } } +class PartialTrans32x32Test + : public ::testing::TestWithParam< + std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > { + public: + virtual ~PartialTrans32x32Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + vpx_bit_depth_t bit_depth_; + FwdTxfmFunc fwd_txfm_; +}; + +TEST_P(PartialTrans32x32Test, Extremes) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]); +} + +TEST_P(PartialTrans32x32Test, Random) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + int sum = 0; + for (int i = 0; i < kNumCoeffs; ++i) { + const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1); + input[i] = val; + sum += val; + } + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ(sum >> 3, output[0]); +} + using std::tr1::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH @@ -327,6 +384,11 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans32x32Test, + ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8), + make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10), + make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P( C, Trans32x32Test, @@ -335,9 +397,12 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test, + ::testing::Values(make_tuple(&vpx_fdct32x32_1_c, + VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH -#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE +#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( NEON, Trans32x32Test, ::testing::Values( @@ -345,7 +410,7 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8))); -#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE +#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( @@ -355,6 +420,9 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -371,6 +439,9 @@ INSTANTIATE_TEST_CASE_P( VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -391,5 +462,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_msa, &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test, + ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa, + VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/libvpx/test/decode_api_test.cc b/libvpx/test/decode_api_test.cc index 318351b73..99b4db10f 100644 --- a/libvpx/test/decode_api_test.cc +++ b/libvpx/test/decode_api_test.cc @@ -27,9 +27,6 @@ TEST(DecodeAPI, InvalidParams) { #if CONFIG_VP9_DECODER &vpx_codec_vp9_dx_algo, #endif -#if CONFIG_VP10_DECODER - &vpx_codec_vp10_dx_algo, -#endif }; uint8_t buf[1] = {0}; vpx_codec_ctx_t dec; @@ -146,6 +143,40 @@ TEST(DecodeAPI, Vp9InvalidDecode) { TestVp9Controls(&dec); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); } + +TEST(DecodeAPI, Vp9PeekSI) { + const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo; + // The first 9 bytes are valid and the rest of the bytes are made up. Until + // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it + // should return VPX_CODEC_CORRUPT_FRAME. + const uint8_t data[32] = { + 0x85, 0xa4, 0xc1, 0xa1, 0x38, 0x81, 0xa3, 0x49, + 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + + for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) { + // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get + // to decoder_peek_si_internal on frames of size < 8. + if (data_sz >= 8) { + vpx_codec_ctx_t dec; + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0)); + EXPECT_EQ((data_sz < 10) ? + VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME, + vpx_codec_decode(&dec, data, data_sz, NULL, 0)); + vpx_codec_iter_t iter = NULL; + EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter)); + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); + } + + // Verify behavior of vpx_codec_peek_stream_info. + vpx_codec_stream_info_t si; + si.sz = sizeof(si); + EXPECT_EQ((data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK, + vpx_codec_peek_stream_info(codec, data, data_sz, &si)); + } +} #endif // CONFIG_VP9_DECODER } // namespace diff --git a/libvpx/test/encode_api_test.cc b/libvpx/test/encode_api_test.cc new file mode 100644 index 000000000..94afddeb6 --- /dev/null +++ b/libvpx/test/encode_api_test.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_config.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +namespace { + +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + +TEST(EncodeAPI, InvalidParams) { + static const vpx_codec_iface_t *kCodecs[] = { +#if CONFIG_VP8_ENCODER + &vpx_codec_vp8_cx_algo, +#endif +#if CONFIG_VP9_ENCODER + &vpx_codec_vp9_cx_algo, +#endif + }; + uint8_t buf[1] = {0}; + vpx_image_t img; + vpx_codec_ctx_t enc; + vpx_codec_enc_cfg_t cfg; + + EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf)); + + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_enc_config_default(NULL, NULL, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_enc_config_default(NULL, &cfg, 0)); + EXPECT_TRUE(vpx_codec_error(NULL) != NULL); + + for (int i = 0; i < NELEMENTS(kCodecs); ++i) { + SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i])); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0)); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_enc_config_default(kCodecs[i], &cfg, 1)); + + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0)); + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0)); + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0)); + + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc)); + } +} + +} // namespace diff --git a/libvpx/test/encode_test_driver.cc b/libvpx/test/encode_test_driver.cc index 128436ee9..b8c737187 100644 --- a/libvpx/test/encode_test_driver.cc +++ b/libvpx/test/encode_test_driver.cc @@ -43,15 +43,6 @@ void Encoder::InitEncoder(VideoSource *video) { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } else #endif -#if CONFIG_VP10_ENCODER - if (CodecInterface() == &vpx_codec_vp10_cx_algo) { - // Default to 1 tile column for VP10. - const int log2_tile_columns = 0; - res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS, - log2_tile_columns); - ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); - } else -#endif { #if CONFIG_VP8_ENCODER ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface()) diff --git a/libvpx/test/encode_test_driver.h b/libvpx/test/encode_test_driver.h index 6d0a72f98..d14ddc7d7 100644 --- a/libvpx/test/encode_test_driver.h +++ b/libvpx/test/encode_test_driver.h @@ -16,7 +16,7 @@ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" -#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif #include "vpx/vpx_encoder.h" @@ -143,7 +143,7 @@ class Encoder { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } -#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER void Control(int ctrl_id, vpx_active_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); diff --git a/libvpx/test/error_resilience_test.cc b/libvpx/test/error_resilience_test.cc index 9a2ad2f35..00a095ce8 100644 --- a/libvpx/test/error_resilience_test.cc +++ b/libvpx/test/error_resilience_test.cc @@ -100,7 +100,7 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest, } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, - ::libvpx_test::Encoder *encoder) { + ::libvpx_test::Encoder * /*encoder*/) { frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF); @@ -596,7 +596,4 @@ VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls, ONE_PASS_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, ::testing::Values(true)); -// SVC-related tests don't run for VP10 since SVC is not supported. -VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, - ::testing::Values(false)); } // namespace diff --git a/libvpx/test/external_frame_buffer_test.cc b/libvpx/test/external_frame_buffer_test.cc index d02dca2be..2570f44eb 100644 --- a/libvpx/test/external_frame_buffer_test.cc +++ b/libvpx/test/external_frame_buffer_test.cc @@ -24,7 +24,6 @@ namespace { const int kVideoNameParam = 1; -const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; struct ExternalFrameBuffer { uint8_t *data; @@ -155,6 +154,8 @@ class ExternalFrameBufferList { ExternalFrameBuffer *ext_fb_list_; }; +#if CONFIG_WEBM_IO + // Callback used by libvpx to request the application to return a frame // buffer of at least |min_size| in bytes. int get_vp9_frame_buffer(void *user_priv, size_t min_size, @@ -197,6 +198,8 @@ int do_not_release_vp9_frame_buffer(void *user_priv, return 0; } +#endif // CONFIG_WEBM_IO + // Class for testing passing in external frame buffers to libvpx. class ExternalFrameBufferMD5Test : public ::libvpx_test::DecoderTest, @@ -278,6 +281,8 @@ class ExternalFrameBufferMD5Test }; #if CONFIG_WEBM_IO +const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; + // Class for testing passing in external frame buffers to libvpx. class ExternalFrameBufferTest : public ::testing::Test { protected: diff --git a/libvpx/test/fdct4x4_test.cc b/libvpx/test/fdct4x4_test.cc index 3f6b738e5..735cccf8d 100644 --- a/libvpx/test/fdct4x4_test.cc +++ b/libvpx/test/fdct4x4_test.cc @@ -40,7 +40,7 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param; typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param; void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride, - int tx_type) { + int /*tx_type*/) { vpx_fdct4x4_c(in, out, stride); } @@ -49,7 +49,7 @@ void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { } void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride, - int tx_type) { + int /*tx_type*/) { vp9_fwht4x4_c(in, out, stride); } @@ -141,11 +141,11 @@ class Trans4x4TestBase { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else ASSERT_EQ(VPX_BITS_8, bit_depth_); - const uint32_t diff = dst[j] - src[j]; + const int diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; if (max_error < error) @@ -258,10 +258,10 @@ class Trans4x4TestBase { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else - const uint32_t diff = dst[j] - src[j]; + const int diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; EXPECT_GE(static_cast<uint32_t>(limit), error) @@ -487,19 +487,11 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8))); #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \ - !CONFIG_EMULATE_HARDWARE -INSTANTIATE_TEST_CASE_P( - MMX, Trans4x4WHT, - ::testing::Values( - make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8))); -#endif - -#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \ - !CONFIG_EMULATE_HARDWARE +#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4WHT, ::testing::Values( + make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8))); #endif diff --git a/libvpx/test/fdct8x8_test.cc b/libvpx/test/fdct8x8_test.cc index c0deaf406..29f215817 100644 --- a/libvpx/test/fdct8x8_test.cc +++ b/libvpx/test/fdct8x8_test.cc @@ -47,7 +47,7 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param; typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param; typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param; -void reference_8x8_dct_1d(const double in[8], double out[8], int stride) { +void reference_8x8_dct_1d(const double in[8], double out[8]) { const double kInvSqrt2 = 0.707106781186547524400844362104; for (int k = 0; k < 8; k++) { out[k] = 0.0; @@ -65,7 +65,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs], double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = input[j*8 + i]; - reference_8x8_dct_1d(temp_in, temp_out, 1); + reference_8x8_dct_1d(temp_in, temp_out); for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j]; } @@ -74,7 +74,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs], double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i*8]; - reference_8x8_dct_1d(temp_in, temp_out, 1); + reference_8x8_dct_1d(temp_in, temp_out); // Scale by some magic number for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2; @@ -82,7 +82,8 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs], } -void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { +void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, + int /*tx_type*/) { vpx_fdct8x8_c(in, out, stride); } @@ -107,6 +108,8 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12); } +#if HAVE_SSE2 + void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_10_add_c(in, out, stride, 10); } @@ -115,7 +118,6 @@ void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_10_add_c(in, out, stride, 12); } -#if HAVE_SSE2 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10); } @@ -423,10 +425,10 @@ class FwdTrans8x8TestBase { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else - const uint32_t diff = dst[j] - src[j]; + const int diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; EXPECT_GE(1u << 2 * (bit_depth_ - 8), error) @@ -456,7 +458,7 @@ class FwdTrans8x8TestBase { coeff_r[j] = static_cast<tran_low_t>(round(out_r[j])); for (int j = 0; j < kNumCoeffs; ++j) { - const uint32_t diff = coeff[j] - coeff_r[j]; + const int32_t diff = coeff[j] - coeff_r[j]; const uint32_t error = diff * diff; EXPECT_GE(9u << 2 * (bit_depth_ - 8), error) << "Error: 8x8 DCT has error " << error @@ -509,10 +511,10 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) { for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH - const uint32_t diff = + const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; #else - const uint32_t diff = dst[j] - ref[j]; + const int diff = dst[j] - ref[j]; #endif const uint32_t error = diff * diff; EXPECT_EQ(0u, error) @@ -641,7 +643,7 @@ class InvTrans8x8DCT void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } - void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {} + void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {} IdctFunc ref_txfm_; IdctFunc inv_txfm_; diff --git a/libvpx/test/hadamard_test.cc b/libvpx/test/hadamard_test.cc new file mode 100644 index 000000000..7a5bd5b4c --- /dev/null +++ b/libvpx/test/hadamard_test.cc @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libvpx_test::ACMRandom; + +typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b); + +void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { + int16_t b[8]; + for (int i = 0; i < 8; i += 2) { + b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride]; + b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride]; + } + int16_t c[8]; + for (int i = 0; i < 8; i += 4) { + c[i + 0] = b[i + 0] + b[i + 2]; + c[i + 1] = b[i + 1] + b[i + 3]; + c[i + 2] = b[i + 0] - b[i + 2]; + c[i + 3] = b[i + 1] - b[i + 3]; + } + out[0] = c[0] + c[4]; + out[7] = c[1] + c[5]; + out[3] = c[2] + c[6]; + out[4] = c[3] + c[7]; + out[2] = c[0] - c[4]; + out[6] = c[1] - c[5]; + out[1] = c[2] - c[6]; + out[5] = c[3] - c[7]; +} + +void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) { + int16_t buf[64]; + for (int i = 0; i < 8; ++i) { + hadamard_loop(a + i, a_stride, buf + i * 8); + } + + for (int i = 0; i < 8; ++i) { + hadamard_loop(buf + i, 8, b + i * 8); + } +} + +void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) { + /* The source is a 16x16 block. The destination is rearranged to 8x32. + * Input is 9 bit. */ + reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0); + reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64); + reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128); + reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192); + + /* Overlay the 8x8 blocks and combine. */ + for (int i = 0; i < 64; ++i) { + /* 8x8 steps the range up to 15 bits. */ + const int16_t a0 = b[0]; + const int16_t a1 = b[64]; + const int16_t a2 = b[128]; + const int16_t a3 = b[192]; + + /* Prevent the result from escaping int16_t. */ + const int16_t b0 = (a0 + a1) >> 1; + const int16_t b1 = (a0 - a1) >> 1; + const int16_t b2 = (a2 + a3) >> 1; + const int16_t b3 = (a2 - a3) >> 1; + + /* Store a 16 bit value. */ + b[ 0] = b0 + b2; + b[ 64] = b1 + b3; + b[128] = b0 - b2; + b[192] = b1 - b3; + + ++b; + } +} + +class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> { + public: + virtual void SetUp() { + h_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + HadamardFunc h_func_; + ACMRandom rnd_; +}; + +class Hadamard8x8Test : public HadamardTestBase {}; + +TEST_P(Hadamard8x8Test, CompareReferenceRandom) { + DECLARE_ALIGNED(16, int16_t, a[64]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64; ++i) { + a[i] = rnd_.Rand9Signed(); + } + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard8x8(a, 8, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); +} + +TEST_P(Hadamard8x8Test, VaryStride) { + DECLARE_ALIGNED(16, int16_t, a[64 * 8]); + DECLARE_ALIGNED(16, int16_t, b[64]); + int16_t b_ref[64]; + for (int i = 0; i < 64 * 8; ++i) { + a[i] = rnd_.Rand9Signed(); + } + + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard8x8(a, i, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 64); + std::sort(b_ref, b_ref + 64); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } +} + +INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test, + ::testing::Values(&vpx_hadamard_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, + ::testing::Values(&vpx_hadamard_8x8_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, + ::testing::Values(&vpx_hadamard_8x8_ssse3)); +#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, + ::testing::Values(&vpx_hadamard_8x8_neon)); +#endif // HAVE_NEON + +class Hadamard16x16Test : public HadamardTestBase {}; + +TEST_P(Hadamard16x16Test, CompareReferenceRandom) { + DECLARE_ALIGNED(16, int16_t, a[16 * 16]); + DECLARE_ALIGNED(16, int16_t, b[16 * 16]); + int16_t b_ref[16 * 16]; + for (int i = 0; i < 16 * 16; ++i) { + a[i] = rnd_.Rand9Signed(); + } + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard16x16(a, 16, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 16 * 16); + std::sort(b_ref, b_ref + 16 * 16); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); +} + +TEST_P(Hadamard16x16Test, VaryStride) { + DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]); + DECLARE_ALIGNED(16, int16_t, b[16 * 16]); + int16_t b_ref[16 * 16]; + for (int i = 0; i < 16 * 16 * 8; ++i) { + a[i] = rnd_.Rand9Signed(); + } + + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + reference_hadamard16x16(a, i, b_ref); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + 16 * 16); + std::sort(b_ref, b_ref + 16 * 16); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } +} + +INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test, + ::testing::Values(&vpx_hadamard_16x16_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test, + ::testing::Values(&vpx_hadamard_16x16_sse2)); +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test, + ::testing::Values(&vpx_hadamard_16x16_neon)); +#endif // HAVE_NEON +} // namespace diff --git a/libvpx/test/level_test.cc b/libvpx/test/level_test.cc new file mode 100644 index 000000000..62d0247d4 --- /dev/null +++ b/libvpx/test/level_test.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { +class LevelTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { + protected: + LevelTest() + : EncoderTest(GET_PARAM(0)), + encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), + min_gf_internal_(24), + target_level_(0), + level_(0) {} + virtual ~LevelTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + if (encoding_mode_ != ::libvpx_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + cfg_.rc_end_usage = VPX_VBR; + } else { + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = VPX_CBR; + } + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, cpu_used_); + encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_); + encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_); + if (encoding_mode_ != ::libvpx_test::kRealTime) { + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); + encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); + encoder->Control(VP8E_SET_ARNR_TYPE, 3); + } + } + encoder->Control(VP9E_GET_LEVEL, &level_); + ASSERT_LE(level_, 51); + ASSERT_GE(level_, 0); + } + + ::libvpx_test::TestMode encoding_mode_; + int cpu_used_; + int min_gf_internal_; + int target_level_; + int level_; +}; + +// Test for keeping level stats only +TEST_P(LevelTest, TestTargetLevel0) { + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + target_level_ = 0; + min_gf_internal_ = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(11, level_); + + cfg_.rc_target_bitrate = 1600; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(20, level_); +} + +// Test for level control being turned off +TEST_P(LevelTest, TestTargetLevel255) { + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 30); + target_level_ = 255; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(LevelTest, TestTargetLevelApi) { + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1); + static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo; + vpx_codec_ctx_t enc; + vpx_codec_enc_cfg_t cfg; + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0)); + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0)); + for (int level = 0; level <= 256; ++level) { + if (level == 10 || level == 11 || level == 20 || level == 21 || + level == 30 || level == 31 || level == 40 || level == 41 || + level == 50 || level == 51 || level == 52 || level == 60 || + level == 61 || level == 62 || level == 0 || level == 255) + EXPECT_EQ(VPX_CODEC_OK, + vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level)); + else + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, + vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level)); + } + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc)); +} + +VP9_INSTANTIATE_TEST_CASE(LevelTest, + ::testing::Values(::libvpx_test::kTwoPassGood, + ::libvpx_test::kOnePassGood), + ::testing::Range(0, 9)); +} // namespace diff --git a/libvpx/test/lpf_8_test.cc b/libvpx/test/lpf_8_test.cc index 0bf6b0c23..94646e4ff 100644 --- a/libvpx/test/lpf_8_test.cc +++ b/libvpx/test/lpf_8_test.cc @@ -37,120 +37,23 @@ const int number_of_iterations = 10000; #if CONFIG_VP9_HIGHBITDEPTH typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd); + int bd); typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); #else typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count); + const uint8_t *limit, const uint8_t *thresh); typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #endif // CONFIG_VP9_HIGHBITDEPTH -typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t; +typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t; typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t; -#if HAVE_SSE2 -#if CONFIG_VP9_HIGHBITDEPTH -void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd); -} -#else -void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_SSE2 - -#if HAVE_NEON_ASM -#if CONFIG_VP9_HIGHBITDEPTH -// No neon high bitdepth functions. -#else -void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_NEON_ASM - -#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) -void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} -#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) - class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> { public: virtual ~Loop8Test6Param() {} @@ -158,7 +61,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> { loopfilter_op_ = GET_PARAM(0); ref_loopfilter_op_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); - count_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } @@ -166,7 +68,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> { protected: int bit_depth_; - int count_; int mask_; loop_op_t loopfilter_op_; loop_op_t ref_loopfilter_op_; @@ -253,13 +154,13 @@ TEST_P(Loop8Test6Param, OperationCheck) { ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH - ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd); + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); #else - ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_); + ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { @@ -325,13 +226,13 @@ TEST_P(Loop8Test6Param, ValueCheck) { ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH - ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd); + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); #else - ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_); + ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; @@ -535,64 +436,73 @@ INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 8, 1), + &vpx_highbd_lpf_horizontal_4_c, 8), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 8, 1), + &vpx_highbd_lpf_vertical_4_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 2), + &vpx_highbd_lpf_horizontal_8_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 8), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 8, 1), + &vpx_highbd_lpf_vertical_8_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 10, 1), + &vpx_highbd_lpf_horizontal_4_c, 10), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 10, 1), + &vpx_highbd_lpf_vertical_4_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 2), + &vpx_highbd_lpf_horizontal_8_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 10), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 10, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 10, 1), + &vpx_highbd_lpf_vertical_8_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 12, 1), + &vpx_highbd_lpf_horizontal_4_c, 12), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 12, 1), + &vpx_highbd_lpf_vertical_4_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 2), + &vpx_highbd_lpf_horizontal_8_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 12), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 12, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 12, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 10, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 12, 1))); + &vpx_highbd_lpf_vertical_8_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 8, 1))); + make_tuple(&vpx_lpf_horizontal_4_sse2, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_horizontal_8_sse2, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8_sse2, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_sse2, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_4_sse2, + &vpx_lpf_vertical_4_c, 8), + make_tuple(&vpx_lpf_vertical_8_sse2, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_sse2, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_sse2, + &vpx_lpf_vertical_16_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif @@ -600,9 +510,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( AVX2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, - 2))); + make_tuple(&vpx_lpf_horizontal_edge_8_avx2, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_avx2, + &vpx_lpf_horizontal_edge_16_c, 8))); #endif #if HAVE_SSE2 @@ -659,23 +570,23 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_NEON_ASM // Using #if inside the macro is unsupported on MSVS but the tests are not // currently built for MSVS with ARM and NEON. - make_tuple(&vpx_lpf_horizontal_16_neon, - &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_neon, - &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&wrapper_vertical_16_neon, - &wrapper_vertical_16_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_neon, - &wrapper_vertical_16_dual_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_edge_8_neon, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_neon, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_neon, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_neon, + &vpx_lpf_vertical_16_dual_c, 8), #endif // HAVE_NEON_ASM make_tuple(&vpx_lpf_horizontal_8_neon, - &vpx_lpf_horizontal_8_c, 8, 1), + &vpx_lpf_horizontal_8_c, 8), make_tuple(&vpx_lpf_vertical_8_neon, - &vpx_lpf_vertical_8_c, 8, 1), + &vpx_lpf_vertical_8_c, 8), make_tuple(&vpx_lpf_horizontal_4_neon, - &vpx_lpf_horizontal_4_c, 8, 1), + &vpx_lpf_horizontal_4_c, 8), make_tuple(&vpx_lpf_vertical_4_neon, - &vpx_lpf_vertical_4_c, 8, 1))); + &vpx_lpf_vertical_4_c, 8))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values( @@ -692,15 +603,58 @@ INSTANTIATE_TEST_CASE_P( #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON +#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test6Param, + ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_dspr2, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dspr2, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8, + &vpx_lpf_horizontal_edge_8, 8), + make_tuple(&vpx_lpf_horizontal_edge_16, + &vpx_lpf_horizontal_edge_16, 8), + make_tuple(&vpx_lpf_vertical_4_dspr2, + &vpx_lpf_vertical_4_c, 8), + make_tuple(&vpx_lpf_vertical_8_dspr2, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_dspr2, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_dspr2, + &vpx_lpf_vertical_16_dual_c, 8))); + +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test9Param, + ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_dual_dspr2, + &vpx_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dual_dspr2, + &vpx_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_lpf_vertical_4_dual_dspr2, + &vpx_lpf_vertical_4_dual_c, 8), + make_tuple(&vpx_lpf_vertical_8_dual_dspr2, + &vpx_lpf_vertical_8_dual_c, 8))); +#endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH + #if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1))); + make_tuple(&vpx_lpf_horizontal_4_msa, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_horizontal_8_msa, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8_msa, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_msa, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_4_msa, + &vpx_lpf_vertical_4_c, 8), + make_tuple(&vpx_lpf_vertical_8_msa, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_msa, + &vpx_lpf_vertical_16_c, 8))); INSTANTIATE_TEST_CASE_P( MSA, Loop8Test9Param, diff --git a/libvpx/test/minmax_test.cc b/libvpx/test/minmax_test.cc new file mode 100644 index 000000000..dbe4342dc --- /dev/null +++ b/libvpx/test/minmax_test.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" + +namespace { + +using ::libvpx_test::ACMRandom; + +typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min, int *max); + +class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> { + public: + virtual void SetUp() { + mm_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + MinMaxFunc mm_func_; + ACMRandom rnd_; +}; + +void reference_minmax(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min_ret, int *max_ret) { + int min = 255; + int max = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]); + if (min > diff) min = diff; + if (max < diff) max = diff; + } + } + + *min_ret = min; + *max_ret = max; +} + +TEST_P(MinMaxTest, MinValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 255, sizeof(b)); + b[i] = i; // Set a minimum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(255, max); + EXPECT_EQ(i, min); + } +} + +TEST_P(MinMaxTest, MaxValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + b[i] = i; // Set a maximum difference of i. + + int min, max; + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(i, max); + EXPECT_EQ(0, min); + } +} + +TEST_P(MinMaxTest, CompareReference) { + uint8_t a[64], b[64]; + for (int j = 0; j < 64; j++) { + a[j] = rnd_.Rand8(); + b[j] = rnd_.Rand8(); + } + + int min_ref, max_ref, min, max; + reference_minmax(a, 8, b, 8, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(max_ref, max); + EXPECT_EQ(min_ref, min); +} + +TEST_P(MinMaxTest, CompareReferenceAndVaryStride) { + uint8_t a[8 * 64], b[8 * 64]; + for (int i = 0; i < 8 * 64; i++) { + a[i] = rnd_.Rand8(); + b[i] = rnd_.Rand8(); + } + for (int a_stride = 8; a_stride <= 64; a_stride += 8) { + for (int b_stride = 8; b_stride <= 64; b_stride += 8) { + int min_ref, max_ref, min, max; + reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); + ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); + EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride;; + EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride + << " and b_stride = " << b_stride;; + } + } +} + +INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest, + ::testing::Values(&vpx_minmax_8x8_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest, + ::testing::Values(&vpx_minmax_8x8_neon)); +#endif + +} // namespace diff --git a/libvpx/test/realtime_test.cc b/libvpx/test/realtime_test.cc new file mode 100644 index 000000000..24749e4ec --- /dev/null +++ b/libvpx/test/realtime_test.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace { + +const int kVideoSourceWidth = 320; +const int kVideoSourceHeight = 240; +const int kFramesToEncode = 2; + +class RealtimeTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { + protected: + RealtimeTest() + : EncoderTest(GET_PARAM(0)), frame_packets_(0) {} + virtual ~RealtimeTest() {} + + virtual void SetUp() { + InitializeConfig(); + cfg_.g_lag_in_frames = 0; + SetMode(::libvpx_test::kRealTime); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + // TODO(tomfinegan): We're changing the pass value here to make sure + // we get frames when real time mode is combined with |g_pass| set to + // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets + // the pass value based on the mode passed into EncoderTest::SetMode(), + // which overrides the one specified in SetUp() above. + cfg_.g_pass = VPX_RC_FIRST_PASS; + } + virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) { + frame_packets_++; + } + + int frame_packets_; +}; + +TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) { + ::libvpx_test::RandomVideoSource video; + video.SetSize(kVideoSourceWidth, kVideoSourceHeight); + video.set_limit(kFramesToEncode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_EQ(kFramesToEncode, frame_packets_); +} + +VP8_INSTANTIATE_TEST_CASE(RealtimeTest, + ::testing::Values(::libvpx_test::kRealTime)); +VP9_INSTANTIATE_TEST_CASE(RealtimeTest, + ::testing::Values(::libvpx_test::kRealTime)); + +} // namespace diff --git a/libvpx/test/register_state_check.h b/libvpx/test/register_state_check.h index 489c41942..5336f2fbe 100644 --- a/libvpx/test/register_state_check.h +++ b/libvpx/test/register_state_check.h @@ -36,16 +36,10 @@ #include <windows.h> #include <winnt.h> -namespace testing { -namespace internal { - inline bool operator==(const M128A& lhs, const M128A& rhs) { return (lhs.Low == rhs.Low && lhs.High == rhs.High); } -} // namespace internal -} // namespace testing - namespace libvpx_test { // Compares the state of xmm[6-15] at construction with their state at diff --git a/libvpx/test/resize_test.cc b/libvpx/test/resize_test.cc index 98b6f87e1..90f5452e9 100644 --- a/libvpx/test/resize_test.cc +++ b/libvpx/test/resize_test.cc @@ -7,6 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include <stdio.h> + #include <climits> #include <vector> #include "third_party/googletest/src/include/gtest/gtest.h" @@ -90,34 +92,178 @@ struct FrameInfo { unsigned int h; }; -unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { - if (frame < 10) - return val; - if (frame < 20) - return val / 2; - if (frame < 30) - return val * 2 / 3; - if (frame < 40) - return val / 4; - if (frame < 50) - return val * 7 / 8; - return val; +void ScaleForFrameNumber(unsigned int frame, + unsigned int initial_w, + unsigned int initial_h, + unsigned int *w, + unsigned int *h, + int flag_codec) { + if (frame < 10) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 20) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 30) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 40) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 50) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 60) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 70) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 80) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 90) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 100) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 110) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 120) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 130) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 140) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 150) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 160) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 170) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 180) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 190) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 200) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 210) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 220) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 230) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 240) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 250) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 260) { + *w = initial_w; + *h = initial_h; + return; + } + // Go down very low. + if (frame < 270) { + *w = initial_w / 4; + *h = initial_h / 4; + return; + } + if (flag_codec == 1) { + // Cases that only works for VP9. + // For VP9: Swap width and height of original. + if (frame < 320) { + *w = initial_h; + *h = initial_w; + return; + } + } + *w = initial_w; + *h = initial_h; } class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { public: ResizingVideoSource() { SetSize(kInitialWidth, kInitialHeight); - limit_ = 60; + limit_ = 350; } - + int flag_codec_; virtual ~ResizingVideoSource() {} protected: virtual void Next() { ++frame_; - SetSize(ScaleForFrameNumber(frame_, kInitialWidth), - ScaleForFrameNumber(frame_, kInitialHeight)); + unsigned int width; + unsigned int height; + ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, + flag_codec_); + SetSize(width, height); FillFrame(); } }; @@ -144,15 +290,17 @@ class ResizeTest : public ::libvpx_test::EncoderTest, TEST_P(ResizeTest, TestExternalResizeWorks) { ResizingVideoSource video; + video.flag_codec_ = 0; cfg_.g_lag_in_frames = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const unsigned int frame = static_cast<unsigned>(info->pts); - const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth); - const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight); - + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, + &expected_w, &expected_h, 0); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) @@ -286,11 +434,11 @@ TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) { ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } -class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest, +class ResizeRealtimeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { protected: - ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {} - virtual ~ResizeInternalRealtimeTest() {} + ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ResizeRealtimeTest() {} virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { @@ -317,9 +465,18 @@ class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest, frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); } + virtual void MismatchHook(const vpx_image_t *img1, + const vpx_image_t *img2) { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() { + return mismatch_nframes_; + } + void DefaultConfig() { - cfg_.g_w = 352; - cfg_.g_h = 288; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; cfg_.rc_buf_sz = 1000; @@ -344,16 +501,48 @@ class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest, std::vector< FrameInfo > frame_info_list_; int set_cpu_used_; bool change_bitrate_; + double mismatch_psnr_; + int mismatch_nframes_; }; +TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + video.flag_codec_ = 1; + DefaultConfig(); + // Disable internal resize for this test. + cfg_.rc_resize_allowed = 0; + change_bitrate_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast<unsigned>(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, + &expected_w, &expected_h, 1); + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + // Verify the dynamic resizer behavior for real time, 1 pass CBR mode. // Run at low bitrate, with resize_allowed = 1, and verify that we get // one resize down event. -TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) { +TEST_P(ResizeRealtimeTest, TestInternalResizeDown) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 299); DefaultConfig(); + cfg_.g_w = 352; + cfg_.g_h = 288; change_bitrate_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); unsigned int last_w = cfg_.g_w; @@ -371,22 +560,31 @@ TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) { } } +#if CONFIG_VP9_DECODER // Verify that we get 1 resize down event in this test. ASSERT_EQ(1, resize_count) << "Resizing should occur."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: VP9 decoder unavailable, unable to check resize count!\n"); +#endif } // Verify the dynamic resizer behavior for real time, 1 pass CBR mode. // Start at low target bitrate, raise the bitrate in the middle of the clip, // scaling-up should occur after bitrate changed. -TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) { +TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 299); + 30, 1, 0, 359); DefaultConfig(); + cfg_.g_w = 352; + cfg_.g_h = 288; change_bitrate_ = true; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; // Disable dropped frames. cfg_.rc_dropframe_thresh = 0; // Starting bitrate low. - cfg_.rc_target_bitrate = 100; + cfg_.rc_target_bitrate = 80; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); unsigned int last_w = cfg_.g_w; @@ -410,8 +608,13 @@ TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) { } } +#if CONFIG_VP9_DECODER // Verify that we get 2 resize events in this test. - ASSERT_EQ(2, resize_count) << "Resizing should occur twice."; + ASSERT_EQ(resize_count, 2) << "Resizing should occur twice."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: VP9 decoder unavailable, unable to check resize count!\n"); +#endif } vpx_img_fmt_t CspForFrameNumber(int frame) { @@ -524,7 +727,7 @@ VP9_INSTANTIATE_TEST_CASE(ResizeTest, ::testing::Values(::libvpx_test::kRealTime)); VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest, ::testing::Values(::libvpx_test::kOnePassBest)); -VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest, +VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest, ::testing::Values(::libvpx_test::kRealTime), ::testing::Range(5, 9)); VP9_INSTANTIATE_TEST_CASE(ResizeCspTest, diff --git a/libvpx/test/sad_test.cc b/libvpx/test/sad_test.cc index e6a5e0ba6..e6bd0d793 100644 --- a/libvpx/test/sad_test.cc +++ b/libvpx/test/sad_test.cc @@ -484,260 +484,176 @@ using std::tr1::make_tuple; //------------------------------------------------------------------------------ // C functions -const SadMxNFunc sad64x64_c = vpx_sad64x64_c; -const SadMxNFunc sad64x32_c = vpx_sad64x32_c; -const SadMxNFunc sad32x64_c = vpx_sad32x64_c; -const SadMxNFunc sad32x32_c = vpx_sad32x32_c; -const SadMxNFunc sad32x16_c = vpx_sad32x16_c; -const SadMxNFunc sad16x32_c = vpx_sad16x32_c; -const SadMxNFunc sad16x16_c = vpx_sad16x16_c; -const SadMxNFunc sad16x8_c = vpx_sad16x8_c; -const SadMxNFunc sad8x16_c = vpx_sad8x16_c; -const SadMxNFunc sad8x8_c = vpx_sad8x8_c; -const SadMxNFunc sad8x4_c = vpx_sad8x4_c; -const SadMxNFunc sad4x8_c = vpx_sad4x8_c; -const SadMxNFunc sad4x4_c = vpx_sad4x4_c; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNFunc highbd_sad64x64_c = vpx_highbd_sad64x64_c; -const SadMxNFunc highbd_sad64x32_c = vpx_highbd_sad64x32_c; -const SadMxNFunc highbd_sad32x64_c = vpx_highbd_sad32x64_c; -const SadMxNFunc highbd_sad32x32_c = vpx_highbd_sad32x32_c; -const SadMxNFunc highbd_sad32x16_c = vpx_highbd_sad32x16_c; -const SadMxNFunc highbd_sad16x32_c = vpx_highbd_sad16x32_c; -const SadMxNFunc highbd_sad16x16_c = vpx_highbd_sad16x16_c; -const SadMxNFunc highbd_sad16x8_c = vpx_highbd_sad16x8_c; -const SadMxNFunc highbd_sad8x16_c = vpx_highbd_sad8x16_c; -const SadMxNFunc highbd_sad8x8_c = vpx_highbd_sad8x8_c; -const SadMxNFunc highbd_sad8x4_c = vpx_highbd_sad8x4_c; -const SadMxNFunc highbd_sad4x8_c = vpx_highbd_sad4x8_c; -const SadMxNFunc highbd_sad4x4_c = vpx_highbd_sad4x4_c; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNParam c_tests[] = { - make_tuple(64, 64, sad64x64_c, -1), - make_tuple(64, 32, sad64x32_c, -1), - make_tuple(32, 64, sad32x64_c, -1), - make_tuple(32, 32, sad32x32_c, -1), - make_tuple(32, 16, sad32x16_c, -1), - make_tuple(16, 32, sad16x32_c, -1), - make_tuple(16, 16, sad16x16_c, -1), - make_tuple(16, 8, sad16x8_c, -1), - make_tuple(8, 16, sad8x16_c, -1), - make_tuple(8, 8, sad8x8_c, -1), - make_tuple(8, 4, sad8x4_c, -1), - make_tuple(4, 8, sad4x8_c, -1), - make_tuple(4, 4, sad4x4_c, -1), + make_tuple(64, 64, &vpx_sad64x64_c, -1), + make_tuple(64, 32, &vpx_sad64x32_c, -1), + make_tuple(32, 64, &vpx_sad32x64_c, -1), + make_tuple(32, 32, &vpx_sad32x32_c, -1), + make_tuple(32, 16, &vpx_sad32x16_c, -1), + make_tuple(16, 32, &vpx_sad16x32_c, -1), + make_tuple(16, 16, &vpx_sad16x16_c, -1), + make_tuple(16, 8, &vpx_sad16x8_c, -1), + make_tuple(8, 16, &vpx_sad8x16_c, -1), + make_tuple(8, 8, &vpx_sad8x8_c, -1), + make_tuple(8, 4, &vpx_sad8x4_c, -1), + make_tuple(4, 8, &vpx_sad4x8_c, -1), + make_tuple(4, 4, &vpx_sad4x4_c, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64_c, 8), - make_tuple(64, 32, highbd_sad64x32_c, 8), - make_tuple(32, 64, highbd_sad32x64_c, 8), - make_tuple(32, 32, highbd_sad32x32_c, 8), - make_tuple(32, 16, highbd_sad32x16_c, 8), - make_tuple(16, 32, highbd_sad16x32_c, 8), - make_tuple(16, 16, highbd_sad16x16_c, 8), - make_tuple(16, 8, highbd_sad16x8_c, 8), - make_tuple(8, 16, highbd_sad8x16_c, 8), - make_tuple(8, 8, highbd_sad8x8_c, 8), - make_tuple(8, 4, highbd_sad8x4_c, 8), - make_tuple(4, 8, highbd_sad4x8_c, 8), - make_tuple(4, 4, highbd_sad4x4_c, 8), - make_tuple(64, 64, highbd_sad64x64_c, 10), - make_tuple(64, 32, highbd_sad64x32_c, 10), - make_tuple(32, 64, highbd_sad32x64_c, 10), - make_tuple(32, 32, highbd_sad32x32_c, 10), - make_tuple(32, 16, highbd_sad32x16_c, 10), - make_tuple(16, 32, highbd_sad16x32_c, 10), - make_tuple(16, 16, highbd_sad16x16_c, 10), - make_tuple(16, 8, highbd_sad16x8_c, 10), - make_tuple(8, 16, highbd_sad8x16_c, 10), - make_tuple(8, 8, highbd_sad8x8_c, 10), - make_tuple(8, 4, highbd_sad8x4_c, 10), - make_tuple(4, 8, highbd_sad4x8_c, 10), - make_tuple(4, 4, highbd_sad4x4_c, 10), - make_tuple(64, 64, highbd_sad64x64_c, 12), - make_tuple(64, 32, highbd_sad64x32_c, 12), - make_tuple(32, 64, highbd_sad32x64_c, 12), - make_tuple(32, 32, highbd_sad32x32_c, 12), - make_tuple(32, 16, highbd_sad32x16_c, 12), - make_tuple(16, 32, highbd_sad16x32_c, 12), - make_tuple(16, 16, highbd_sad16x16_c, 12), - make_tuple(16, 8, highbd_sad16x8_c, 12), - make_tuple(8, 16, highbd_sad8x16_c, 12), - make_tuple(8, 8, highbd_sad8x8_c, 12), - make_tuple(8, 4, highbd_sad8x4_c, 12), - make_tuple(4, 8, highbd_sad4x8_c, 12), - make_tuple(4, 4, highbd_sad4x4_c, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64_c, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32_c, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64_c, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32_c, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16_c, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32_c, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16_c, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8_c, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16_c, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8_c, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4_c, 8), + make_tuple(4, 8, &vpx_highbd_sad4x8_c, 8), + make_tuple(4, 4, &vpx_highbd_sad4x4_c, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64_c, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32_c, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64_c, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32_c, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16_c, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32_c, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16_c, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8_c, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16_c, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8_c, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4_c, 10), + make_tuple(4, 8, &vpx_highbd_sad4x8_c, 10), + make_tuple(4, 4, &vpx_highbd_sad4x4_c, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64_c, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32_c, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64_c, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32_c, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16_c, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32_c, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16_c, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8_c, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16_c, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8_c, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4_c, 12), + make_tuple(4, 8, &vpx_highbd_sad4x8_c, 12), + make_tuple(4, 4, &vpx_highbd_sad4x4_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); -const SadMxNAvgFunc sad64x64_avg_c = vpx_sad64x64_avg_c; -const SadMxNAvgFunc sad64x32_avg_c = vpx_sad64x32_avg_c; -const SadMxNAvgFunc sad32x64_avg_c = vpx_sad32x64_avg_c; -const SadMxNAvgFunc sad32x32_avg_c = vpx_sad32x32_avg_c; -const SadMxNAvgFunc sad32x16_avg_c = vpx_sad32x16_avg_c; -const SadMxNAvgFunc sad16x32_avg_c = vpx_sad16x32_avg_c; -const SadMxNAvgFunc sad16x16_avg_c = vpx_sad16x16_avg_c; -const SadMxNAvgFunc sad16x8_avg_c = vpx_sad16x8_avg_c; -const SadMxNAvgFunc sad8x16_avg_c = vpx_sad8x16_avg_c; -const SadMxNAvgFunc sad8x8_avg_c = vpx_sad8x8_avg_c; -const SadMxNAvgFunc sad8x4_avg_c = vpx_sad8x4_avg_c; -const SadMxNAvgFunc sad4x8_avg_c = vpx_sad4x8_avg_c; -const SadMxNAvgFunc sad4x4_avg_c = vpx_sad4x4_avg_c; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNAvgFunc highbd_sad64x64_avg_c = vpx_highbd_sad64x64_avg_c; -const SadMxNAvgFunc highbd_sad64x32_avg_c = vpx_highbd_sad64x32_avg_c; -const SadMxNAvgFunc highbd_sad32x64_avg_c = vpx_highbd_sad32x64_avg_c; -const SadMxNAvgFunc highbd_sad32x32_avg_c = vpx_highbd_sad32x32_avg_c; -const SadMxNAvgFunc highbd_sad32x16_avg_c = vpx_highbd_sad32x16_avg_c; -const SadMxNAvgFunc highbd_sad16x32_avg_c = vpx_highbd_sad16x32_avg_c; -const SadMxNAvgFunc highbd_sad16x16_avg_c = vpx_highbd_sad16x16_avg_c; -const SadMxNAvgFunc highbd_sad16x8_avg_c = vpx_highbd_sad16x8_avg_c; -const SadMxNAvgFunc highbd_sad8x16_avg_c = vpx_highbd_sad8x16_avg_c; -const SadMxNAvgFunc highbd_sad8x8_avg_c = vpx_highbd_sad8x8_avg_c; -const SadMxNAvgFunc highbd_sad8x4_avg_c = vpx_highbd_sad8x4_avg_c; -const SadMxNAvgFunc highbd_sad4x8_avg_c = vpx_highbd_sad4x8_avg_c; -const SadMxNAvgFunc highbd_sad4x4_avg_c = vpx_highbd_sad4x4_avg_c; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNAvgParam avg_c_tests[] = { - make_tuple(64, 64, sad64x64_avg_c, -1), - make_tuple(64, 32, sad64x32_avg_c, -1), - make_tuple(32, 64, sad32x64_avg_c, -1), - make_tuple(32, 32, sad32x32_avg_c, -1), - make_tuple(32, 16, sad32x16_avg_c, -1), - make_tuple(16, 32, sad16x32_avg_c, -1), - make_tuple(16, 16, sad16x16_avg_c, -1), - make_tuple(16, 8, sad16x8_avg_c, -1), - make_tuple(8, 16, sad8x16_avg_c, -1), - make_tuple(8, 8, sad8x8_avg_c, -1), - make_tuple(8, 4, sad8x4_avg_c, -1), - make_tuple(4, 8, sad4x8_avg_c, -1), - make_tuple(4, 4, sad4x4_avg_c, -1), + make_tuple(64, 64, &vpx_sad64x64_avg_c, -1), + make_tuple(64, 32, &vpx_sad64x32_avg_c, -1), + make_tuple(32, 64, &vpx_sad32x64_avg_c, -1), + make_tuple(32, 32, &vpx_sad32x32_avg_c, -1), + make_tuple(32, 16, &vpx_sad32x16_avg_c, -1), + make_tuple(16, 32, &vpx_sad16x32_avg_c, -1), + make_tuple(16, 16, &vpx_sad16x16_avg_c, -1), + make_tuple(16, 8, &vpx_sad16x8_avg_c, -1), + make_tuple(8, 16, &vpx_sad8x16_avg_c, -1), + make_tuple(8, 8, &vpx_sad8x8_avg_c, -1), + make_tuple(8, 4, &vpx_sad8x4_avg_c, -1), + make_tuple(4, 8, &vpx_sad4x8_avg_c, -1), + make_tuple(4, 4, &vpx_sad4x4_avg_c, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64_avg_c, 8), - make_tuple(64, 32, highbd_sad64x32_avg_c, 8), - make_tuple(32, 64, highbd_sad32x64_avg_c, 8), - make_tuple(32, 32, highbd_sad32x32_avg_c, 8), - make_tuple(32, 16, highbd_sad32x16_avg_c, 8), - make_tuple(16, 32, highbd_sad16x32_avg_c, 8), - make_tuple(16, 16, highbd_sad16x16_avg_c, 8), - make_tuple(16, 8, highbd_sad16x8_avg_c, 8), - make_tuple(8, 16, highbd_sad8x16_avg_c, 8), - make_tuple(8, 8, highbd_sad8x8_avg_c, 8), - make_tuple(8, 4, highbd_sad8x4_avg_c, 8), - make_tuple(4, 8, highbd_sad4x8_avg_c, 8), - make_tuple(4, 4, highbd_sad4x4_avg_c, 8), - make_tuple(64, 64, highbd_sad64x64_avg_c, 10), - make_tuple(64, 32, highbd_sad64x32_avg_c, 10), - make_tuple(32, 64, highbd_sad32x64_avg_c, 10), - make_tuple(32, 32, highbd_sad32x32_avg_c, 10), - make_tuple(32, 16, highbd_sad32x16_avg_c, 10), - make_tuple(16, 32, highbd_sad16x32_avg_c, 10), - make_tuple(16, 16, highbd_sad16x16_avg_c, 10), - make_tuple(16, 8, highbd_sad16x8_avg_c, 10), - make_tuple(8, 16, highbd_sad8x16_avg_c, 10), - make_tuple(8, 8, highbd_sad8x8_avg_c, 10), - make_tuple(8, 4, highbd_sad8x4_avg_c, 10), - make_tuple(4, 8, highbd_sad4x8_avg_c, 10), - make_tuple(4, 4, highbd_sad4x4_avg_c, 10), - make_tuple(64, 64, highbd_sad64x64_avg_c, 12), - make_tuple(64, 32, highbd_sad64x32_avg_c, 12), - make_tuple(32, 64, highbd_sad32x64_avg_c, 12), - make_tuple(32, 32, highbd_sad32x32_avg_c, 12), - make_tuple(32, 16, highbd_sad32x16_avg_c, 12), - make_tuple(16, 32, highbd_sad16x32_avg_c, 12), - make_tuple(16, 16, highbd_sad16x16_avg_c, 12), - make_tuple(16, 8, highbd_sad16x8_avg_c, 12), - make_tuple(8, 16, highbd_sad8x16_avg_c, 12), - make_tuple(8, 8, highbd_sad8x8_avg_c, 12), - make_tuple(8, 4, highbd_sad8x4_avg_c, 12), - make_tuple(4, 8, highbd_sad4x8_avg_c, 12), - make_tuple(4, 4, highbd_sad4x4_avg_c, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 8), + make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 8), + make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 10), + make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 10), + make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 12), + make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 12), + make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests)); -const SadMxNx4Func sad64x64x4d_c = vpx_sad64x64x4d_c; -const SadMxNx4Func sad64x32x4d_c = vpx_sad64x32x4d_c; -const SadMxNx4Func sad32x64x4d_c = vpx_sad32x64x4d_c; -const SadMxNx4Func sad32x32x4d_c = vpx_sad32x32x4d_c; -const SadMxNx4Func sad32x16x4d_c = vpx_sad32x16x4d_c; -const SadMxNx4Func sad16x32x4d_c = vpx_sad16x32x4d_c; -const SadMxNx4Func sad16x16x4d_c = vpx_sad16x16x4d_c; -const SadMxNx4Func sad16x8x4d_c = vpx_sad16x8x4d_c; -const SadMxNx4Func sad8x16x4d_c = vpx_sad8x16x4d_c; -const SadMxNx4Func sad8x8x4d_c = vpx_sad8x8x4d_c; -const SadMxNx4Func sad8x4x4d_c = vpx_sad8x4x4d_c; -const SadMxNx4Func sad4x8x4d_c = vpx_sad4x8x4d_c; -const SadMxNx4Func sad4x4x4d_c = vpx_sad4x4x4d_c; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNx4Func highbd_sad64x64x4d_c = vpx_highbd_sad64x64x4d_c; -const SadMxNx4Func highbd_sad64x32x4d_c = vpx_highbd_sad64x32x4d_c; -const SadMxNx4Func highbd_sad32x64x4d_c = vpx_highbd_sad32x64x4d_c; -const SadMxNx4Func highbd_sad32x32x4d_c = vpx_highbd_sad32x32x4d_c; -const SadMxNx4Func highbd_sad32x16x4d_c = vpx_highbd_sad32x16x4d_c; -const SadMxNx4Func highbd_sad16x32x4d_c = vpx_highbd_sad16x32x4d_c; -const SadMxNx4Func highbd_sad16x16x4d_c = vpx_highbd_sad16x16x4d_c; -const SadMxNx4Func highbd_sad16x8x4d_c = vpx_highbd_sad16x8x4d_c; -const SadMxNx4Func highbd_sad8x16x4d_c = vpx_highbd_sad8x16x4d_c; -const SadMxNx4Func highbd_sad8x8x4d_c = vpx_highbd_sad8x8x4d_c; -const SadMxNx4Func highbd_sad8x4x4d_c = vpx_highbd_sad8x4x4d_c; -const SadMxNx4Func highbd_sad4x8x4d_c = vpx_highbd_sad4x8x4d_c; -const SadMxNx4Func highbd_sad4x4x4d_c = vpx_highbd_sad4x4x4d_c; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNx4Param x4d_c_tests[] = { - make_tuple(64, 64, sad64x64x4d_c, -1), - make_tuple(64, 32, sad64x32x4d_c, -1), - make_tuple(32, 64, sad32x64x4d_c, -1), - make_tuple(32, 32, sad32x32x4d_c, -1), - make_tuple(32, 16, sad32x16x4d_c, -1), - make_tuple(16, 32, sad16x32x4d_c, -1), - make_tuple(16, 16, sad16x16x4d_c, -1), - make_tuple(16, 8, sad16x8x4d_c, -1), - make_tuple(8, 16, sad8x16x4d_c, -1), - make_tuple(8, 8, sad8x8x4d_c, -1), - make_tuple(8, 4, sad8x4x4d_c, -1), - make_tuple(4, 8, sad4x8x4d_c, -1), - make_tuple(4, 4, sad4x4x4d_c, -1), + make_tuple(64, 64, &vpx_sad64x64x4d_c, -1), + make_tuple(64, 32, &vpx_sad64x32x4d_c, -1), + make_tuple(32, 64, &vpx_sad32x64x4d_c, -1), + make_tuple(32, 32, &vpx_sad32x32x4d_c, -1), + make_tuple(32, 16, &vpx_sad32x16x4d_c, -1), + make_tuple(16, 32, &vpx_sad16x32x4d_c, -1), + make_tuple(16, 16, &vpx_sad16x16x4d_c, -1), + make_tuple(16, 8, &vpx_sad16x8x4d_c, -1), + make_tuple(8, 16, &vpx_sad8x16x4d_c, -1), + make_tuple(8, 8, &vpx_sad8x8x4d_c, -1), + make_tuple(8, 4, &vpx_sad8x4x4d_c, -1), + make_tuple(4, 8, &vpx_sad4x8x4d_c, -1), + make_tuple(4, 4, &vpx_sad4x4x4d_c, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64x4d_c, 8), - make_tuple(64, 32, highbd_sad64x32x4d_c, 8), - make_tuple(32, 64, highbd_sad32x64x4d_c, 8), - make_tuple(32, 32, highbd_sad32x32x4d_c, 8), - make_tuple(32, 16, highbd_sad32x16x4d_c, 8), - make_tuple(16, 32, highbd_sad16x32x4d_c, 8), - make_tuple(16, 16, highbd_sad16x16x4d_c, 8), - make_tuple(16, 8, highbd_sad16x8x4d_c, 8), - make_tuple(8, 16, highbd_sad8x16x4d_c, 8), - make_tuple(8, 8, highbd_sad8x8x4d_c, 8), - make_tuple(8, 4, highbd_sad8x4x4d_c, 8), - make_tuple(4, 8, highbd_sad4x8x4d_c, 8), - make_tuple(4, 4, highbd_sad4x4x4d_c, 8), - make_tuple(64, 64, highbd_sad64x64x4d_c, 10), - make_tuple(64, 32, highbd_sad64x32x4d_c, 10), - make_tuple(32, 64, highbd_sad32x64x4d_c, 10), - make_tuple(32, 32, highbd_sad32x32x4d_c, 10), - make_tuple(32, 16, highbd_sad32x16x4d_c, 10), - make_tuple(16, 32, highbd_sad16x32x4d_c, 10), - make_tuple(16, 16, highbd_sad16x16x4d_c, 10), - make_tuple(16, 8, highbd_sad16x8x4d_c, 10), - make_tuple(8, 16, highbd_sad8x16x4d_c, 10), - make_tuple(8, 8, highbd_sad8x8x4d_c, 10), - make_tuple(8, 4, highbd_sad8x4x4d_c, 10), - make_tuple(4, 8, highbd_sad4x8x4d_c, 10), - make_tuple(4, 4, highbd_sad4x4x4d_c, 10), - make_tuple(64, 64, highbd_sad64x64x4d_c, 12), - make_tuple(64, 32, highbd_sad64x32x4d_c, 12), - make_tuple(32, 64, highbd_sad32x64x4d_c, 12), - make_tuple(32, 32, highbd_sad32x32x4d_c, 12), - make_tuple(32, 16, highbd_sad32x16x4d_c, 12), - make_tuple(16, 32, highbd_sad16x32x4d_c, 12), - make_tuple(16, 16, highbd_sad16x16x4d_c, 12), - make_tuple(16, 8, highbd_sad16x8x4d_c, 12), - make_tuple(8, 16, highbd_sad8x16x4d_c, 12), - make_tuple(8, 8, highbd_sad8x8x4d_c, 12), - make_tuple(8, 4, highbd_sad8x4x4d_c, 12), - make_tuple(4, 8, highbd_sad4x8x4d_c, 12), - make_tuple(4, 4, highbd_sad4x4x4d_c, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 8), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 8), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 10), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 10), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 12), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 12), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); @@ -745,318 +661,194 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); //------------------------------------------------------------------------------ // ARM functions #if HAVE_MEDIA -const SadMxNFunc sad16x16_media = vpx_sad16x16_media; const SadMxNParam media_tests[] = { - make_tuple(16, 16, sad16x16_media, -1), + make_tuple(16, 16, &vpx_sad16x16_media, -1), }; INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests)); #endif // HAVE_MEDIA #if HAVE_NEON -const SadMxNFunc sad64x64_neon = vpx_sad64x64_neon; -const SadMxNFunc sad32x32_neon = vpx_sad32x32_neon; -const SadMxNFunc sad16x16_neon = vpx_sad16x16_neon; -const SadMxNFunc sad16x8_neon = vpx_sad16x8_neon; -const SadMxNFunc sad8x16_neon = vpx_sad8x16_neon; -const SadMxNFunc sad8x8_neon = vpx_sad8x8_neon; -const SadMxNFunc sad4x4_neon = vpx_sad4x4_neon; - const SadMxNParam neon_tests[] = { - make_tuple(64, 64, sad64x64_neon, -1), - make_tuple(32, 32, sad32x32_neon, -1), - make_tuple(16, 16, sad16x16_neon, -1), - make_tuple(16, 8, sad16x8_neon, -1), - make_tuple(8, 16, sad8x16_neon, -1), - make_tuple(8, 8, sad8x8_neon, -1), - make_tuple(4, 4, sad4x4_neon, -1), + make_tuple(64, 64, &vpx_sad64x64_neon, -1), + make_tuple(32, 32, &vpx_sad32x32_neon, -1), + make_tuple(16, 16, &vpx_sad16x16_neon, -1), + make_tuple(16, 8, &vpx_sad16x8_neon, -1), + make_tuple(8, 16, &vpx_sad8x16_neon, -1), + make_tuple(8, 8, &vpx_sad8x8_neon, -1), + make_tuple(4, 4, &vpx_sad4x4_neon, -1), }; INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests)); -const SadMxNx4Func sad64x64x4d_neon = vpx_sad64x64x4d_neon; -const SadMxNx4Func sad32x32x4d_neon = vpx_sad32x32x4d_neon; -const SadMxNx4Func sad16x16x4d_neon = vpx_sad16x16x4d_neon; const SadMxNx4Param x4d_neon_tests[] = { - make_tuple(64, 64, sad64x64x4d_neon, -1), - make_tuple(32, 32, sad32x32x4d_neon, -1), - make_tuple(16, 16, sad16x16x4d_neon, -1), + make_tuple(64, 64, &vpx_sad64x64x4d_neon, -1), + make_tuple(32, 32, &vpx_sad32x32x4d_neon, -1), + make_tuple(16, 16, &vpx_sad16x16x4d_neon, -1), }; INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests)); #endif // HAVE_NEON //------------------------------------------------------------------------------ // x86 functions -#if HAVE_MMX -const SadMxNFunc sad16x16_mmx = vpx_sad16x16_mmx; -const SadMxNFunc sad16x8_mmx = vpx_sad16x8_mmx; -const SadMxNFunc sad8x16_mmx = vpx_sad8x16_mmx; -const SadMxNFunc sad8x8_mmx = vpx_sad8x8_mmx; -const SadMxNFunc sad4x4_mmx = vpx_sad4x4_mmx; -const SadMxNParam mmx_tests[] = { - make_tuple(16, 16, sad16x16_mmx, -1), - make_tuple(16, 8, sad16x8_mmx, -1), - make_tuple(8, 16, sad8x16_mmx, -1), - make_tuple(8, 8, sad8x8_mmx, -1), - make_tuple(4, 4, sad4x4_mmx, -1), -}; -INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests)); -#endif // HAVE_MMX - -#if HAVE_SSE -#if CONFIG_USE_X86INC -const SadMxNFunc sad4x8_sse = vpx_sad4x8_sse; -const SadMxNFunc sad4x4_sse = vpx_sad4x4_sse; -const SadMxNParam sse_tests[] = { - make_tuple(4, 8, sad4x8_sse, -1), - make_tuple(4, 4, sad4x4_sse, -1), -}; -INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests)); - -const SadMxNAvgFunc sad4x8_avg_sse = vpx_sad4x8_avg_sse; -const SadMxNAvgFunc sad4x4_avg_sse = vpx_sad4x4_avg_sse; -const SadMxNAvgParam avg_sse_tests[] = { - make_tuple(4, 8, sad4x8_avg_sse, -1), - make_tuple(4, 4, sad4x4_avg_sse, -1), -}; -INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests)); - -const SadMxNx4Func sad4x8x4d_sse = vpx_sad4x8x4d_sse; -const SadMxNx4Func sad4x4x4d_sse = vpx_sad4x4x4d_sse; -const SadMxNx4Param x4d_sse_tests[] = { - make_tuple(4, 8, sad4x8x4d_sse, -1), - make_tuple(4, 4, sad4x4x4d_sse, -1), -}; -INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests)); -#endif // CONFIG_USE_X86INC -#endif // HAVE_SSE - #if HAVE_SSE2 #if CONFIG_USE_X86INC -const SadMxNFunc sad64x64_sse2 = vpx_sad64x64_sse2; -const SadMxNFunc sad64x32_sse2 = vpx_sad64x32_sse2; -const SadMxNFunc sad32x64_sse2 = vpx_sad32x64_sse2; -const SadMxNFunc sad32x32_sse2 = vpx_sad32x32_sse2; -const SadMxNFunc sad32x16_sse2 = vpx_sad32x16_sse2; -const SadMxNFunc sad16x32_sse2 = vpx_sad16x32_sse2; -const SadMxNFunc sad16x16_sse2 = vpx_sad16x16_sse2; -const SadMxNFunc sad16x8_sse2 = vpx_sad16x8_sse2; -const SadMxNFunc sad8x16_sse2 = vpx_sad8x16_sse2; -const SadMxNFunc sad8x8_sse2 = vpx_sad8x8_sse2; -const SadMxNFunc sad8x4_sse2 = vpx_sad8x4_sse2; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNFunc highbd_sad64x64_sse2 = vpx_highbd_sad64x64_sse2; -const SadMxNFunc highbd_sad64x32_sse2 = vpx_highbd_sad64x32_sse2; -const SadMxNFunc highbd_sad32x64_sse2 = vpx_highbd_sad32x64_sse2; -const SadMxNFunc highbd_sad32x32_sse2 = vpx_highbd_sad32x32_sse2; -const SadMxNFunc highbd_sad32x16_sse2 = vpx_highbd_sad32x16_sse2; -const SadMxNFunc highbd_sad16x32_sse2 = vpx_highbd_sad16x32_sse2; -const SadMxNFunc highbd_sad16x16_sse2 = vpx_highbd_sad16x16_sse2; -const SadMxNFunc highbd_sad16x8_sse2 = vpx_highbd_sad16x8_sse2; -const SadMxNFunc highbd_sad8x16_sse2 = vpx_highbd_sad8x16_sse2; -const SadMxNFunc highbd_sad8x8_sse2 = vpx_highbd_sad8x8_sse2; -const SadMxNFunc highbd_sad8x4_sse2 = vpx_highbd_sad8x4_sse2; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNParam sse2_tests[] = { - make_tuple(64, 64, sad64x64_sse2, -1), - make_tuple(64, 32, sad64x32_sse2, -1), - make_tuple(32, 64, sad32x64_sse2, -1), - make_tuple(32, 32, sad32x32_sse2, -1), - make_tuple(32, 16, sad32x16_sse2, -1), - make_tuple(16, 32, sad16x32_sse2, -1), - make_tuple(16, 16, sad16x16_sse2, -1), - make_tuple(16, 8, sad16x8_sse2, -1), - make_tuple(8, 16, sad8x16_sse2, -1), - make_tuple(8, 8, sad8x8_sse2, -1), - make_tuple(8, 4, sad8x4_sse2, -1), + make_tuple(64, 64, &vpx_sad64x64_sse2, -1), + make_tuple(64, 32, &vpx_sad64x32_sse2, -1), + make_tuple(32, 64, &vpx_sad32x64_sse2, -1), + make_tuple(32, 32, &vpx_sad32x32_sse2, -1), + make_tuple(32, 16, &vpx_sad32x16_sse2, -1), + make_tuple(16, 32, &vpx_sad16x32_sse2, -1), + make_tuple(16, 16, &vpx_sad16x16_sse2, -1), + make_tuple(16, 8, &vpx_sad16x8_sse2, -1), + make_tuple(8, 16, &vpx_sad8x16_sse2, -1), + make_tuple(8, 8, &vpx_sad8x8_sse2, -1), + make_tuple(8, 4, &vpx_sad8x4_sse2, -1), + make_tuple(4, 8, &vpx_sad4x8_sse2, -1), + make_tuple(4, 4, &vpx_sad4x4_sse2, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64_sse2, 8), - make_tuple(64, 32, highbd_sad64x32_sse2, 8), - make_tuple(32, 64, highbd_sad32x64_sse2, 8), - make_tuple(32, 32, highbd_sad32x32_sse2, 8), - make_tuple(32, 16, highbd_sad32x16_sse2, 8), - make_tuple(16, 32, highbd_sad16x32_sse2, 8), - make_tuple(16, 16, highbd_sad16x16_sse2, 8), - make_tuple(16, 8, highbd_sad16x8_sse2, 8), - make_tuple(8, 16, highbd_sad8x16_sse2, 8), - make_tuple(8, 8, highbd_sad8x8_sse2, 8), - make_tuple(8, 4, highbd_sad8x4_sse2, 8), - make_tuple(64, 64, highbd_sad64x64_sse2, 10), - make_tuple(64, 32, highbd_sad64x32_sse2, 10), - make_tuple(32, 64, highbd_sad32x64_sse2, 10), - make_tuple(32, 32, highbd_sad32x32_sse2, 10), - make_tuple(32, 16, highbd_sad32x16_sse2, 10), - make_tuple(16, 32, highbd_sad16x32_sse2, 10), - make_tuple(16, 16, highbd_sad16x16_sse2, 10), - make_tuple(16, 8, highbd_sad16x8_sse2, 10), - make_tuple(8, 16, highbd_sad8x16_sse2, 10), - make_tuple(8, 8, highbd_sad8x8_sse2, 10), - make_tuple(8, 4, highbd_sad8x4_sse2, 10), - make_tuple(64, 64, highbd_sad64x64_sse2, 12), - make_tuple(64, 32, highbd_sad64x32_sse2, 12), - make_tuple(32, 64, highbd_sad32x64_sse2, 12), - make_tuple(32, 32, highbd_sad32x32_sse2, 12), - make_tuple(32, 16, highbd_sad32x16_sse2, 12), - make_tuple(16, 32, highbd_sad16x32_sse2, 12), - make_tuple(16, 16, highbd_sad16x16_sse2, 12), - make_tuple(16, 8, highbd_sad16x8_sse2, 12), - make_tuple(8, 16, highbd_sad8x16_sse2, 12), - make_tuple(8, 8, highbd_sad8x8_sse2, 12), - make_tuple(8, 4, highbd_sad8x4_sse2, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); -const SadMxNAvgFunc sad64x64_avg_sse2 = vpx_sad64x64_avg_sse2; -const SadMxNAvgFunc sad64x32_avg_sse2 = vpx_sad64x32_avg_sse2; -const SadMxNAvgFunc sad32x64_avg_sse2 = vpx_sad32x64_avg_sse2; -const SadMxNAvgFunc sad32x32_avg_sse2 = vpx_sad32x32_avg_sse2; -const SadMxNAvgFunc sad32x16_avg_sse2 = vpx_sad32x16_avg_sse2; -const SadMxNAvgFunc sad16x32_avg_sse2 = vpx_sad16x32_avg_sse2; -const SadMxNAvgFunc sad16x16_avg_sse2 = vpx_sad16x16_avg_sse2; -const SadMxNAvgFunc sad16x8_avg_sse2 = vpx_sad16x8_avg_sse2; -const SadMxNAvgFunc sad8x16_avg_sse2 = vpx_sad8x16_avg_sse2; -const SadMxNAvgFunc sad8x8_avg_sse2 = vpx_sad8x8_avg_sse2; -const SadMxNAvgFunc sad8x4_avg_sse2 = vpx_sad8x4_avg_sse2; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNAvgFunc highbd_sad64x64_avg_sse2 = vpx_highbd_sad64x64_avg_sse2; -const SadMxNAvgFunc highbd_sad64x32_avg_sse2 = vpx_highbd_sad64x32_avg_sse2; -const SadMxNAvgFunc highbd_sad32x64_avg_sse2 = vpx_highbd_sad32x64_avg_sse2; -const SadMxNAvgFunc highbd_sad32x32_avg_sse2 = vpx_highbd_sad32x32_avg_sse2; -const SadMxNAvgFunc highbd_sad32x16_avg_sse2 = vpx_highbd_sad32x16_avg_sse2; -const SadMxNAvgFunc highbd_sad16x32_avg_sse2 = vpx_highbd_sad16x32_avg_sse2; -const SadMxNAvgFunc highbd_sad16x16_avg_sse2 = vpx_highbd_sad16x16_avg_sse2; -const SadMxNAvgFunc highbd_sad16x8_avg_sse2 = vpx_highbd_sad16x8_avg_sse2; -const SadMxNAvgFunc highbd_sad8x16_avg_sse2 = vpx_highbd_sad8x16_avg_sse2; -const SadMxNAvgFunc highbd_sad8x8_avg_sse2 = vpx_highbd_sad8x8_avg_sse2; -const SadMxNAvgFunc highbd_sad8x4_avg_sse2 = vpx_highbd_sad8x4_avg_sse2; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNAvgParam avg_sse2_tests[] = { - make_tuple(64, 64, sad64x64_avg_sse2, -1), - make_tuple(64, 32, sad64x32_avg_sse2, -1), - make_tuple(32, 64, sad32x64_avg_sse2, -1), - make_tuple(32, 32, sad32x32_avg_sse2, -1), - make_tuple(32, 16, sad32x16_avg_sse2, -1), - make_tuple(16, 32, sad16x32_avg_sse2, -1), - make_tuple(16, 16, sad16x16_avg_sse2, -1), - make_tuple(16, 8, sad16x8_avg_sse2, -1), - make_tuple(8, 16, sad8x16_avg_sse2, -1), - make_tuple(8, 8, sad8x8_avg_sse2, -1), - make_tuple(8, 4, sad8x4_avg_sse2, -1), + make_tuple(64, 64, &vpx_sad64x64_avg_sse2, -1), + make_tuple(64, 32, &vpx_sad64x32_avg_sse2, -1), + make_tuple(32, 64, &vpx_sad32x64_avg_sse2, -1), + make_tuple(32, 32, &vpx_sad32x32_avg_sse2, -1), + make_tuple(32, 16, &vpx_sad32x16_avg_sse2, -1), + make_tuple(16, 32, &vpx_sad16x32_avg_sse2, -1), + make_tuple(16, 16, &vpx_sad16x16_avg_sse2, -1), + make_tuple(16, 8, &vpx_sad16x8_avg_sse2, -1), + make_tuple(8, 16, &vpx_sad8x16_avg_sse2, -1), + make_tuple(8, 8, &vpx_sad8x8_avg_sse2, -1), + make_tuple(8, 4, &vpx_sad8x4_avg_sse2, -1), + make_tuple(4, 8, &vpx_sad4x8_avg_sse2, -1), + make_tuple(4, 4, &vpx_sad4x4_avg_sse2, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64_avg_sse2, 8), - make_tuple(64, 32, highbd_sad64x32_avg_sse2, 8), - make_tuple(32, 64, highbd_sad32x64_avg_sse2, 8), - make_tuple(32, 32, highbd_sad32x32_avg_sse2, 8), - make_tuple(32, 16, highbd_sad32x16_avg_sse2, 8), - make_tuple(16, 32, highbd_sad16x32_avg_sse2, 8), - make_tuple(16, 16, highbd_sad16x16_avg_sse2, 8), - make_tuple(16, 8, highbd_sad16x8_avg_sse2, 8), - make_tuple(8, 16, highbd_sad8x16_avg_sse2, 8), - make_tuple(8, 8, highbd_sad8x8_avg_sse2, 8), - make_tuple(8, 4, highbd_sad8x4_avg_sse2, 8), - make_tuple(64, 64, highbd_sad64x64_avg_sse2, 10), - make_tuple(64, 32, highbd_sad64x32_avg_sse2, 10), - make_tuple(32, 64, highbd_sad32x64_avg_sse2, 10), - make_tuple(32, 32, highbd_sad32x32_avg_sse2, 10), - make_tuple(32, 16, highbd_sad32x16_avg_sse2, 10), - make_tuple(16, 32, highbd_sad16x32_avg_sse2, 10), - make_tuple(16, 16, highbd_sad16x16_avg_sse2, 10), - make_tuple(16, 8, highbd_sad16x8_avg_sse2, 10), - make_tuple(8, 16, highbd_sad8x16_avg_sse2, 10), - make_tuple(8, 8, highbd_sad8x8_avg_sse2, 10), - make_tuple(8, 4, highbd_sad8x4_avg_sse2, 10), - make_tuple(64, 64, highbd_sad64x64_avg_sse2, 12), - make_tuple(64, 32, highbd_sad64x32_avg_sse2, 12), - make_tuple(32, 64, highbd_sad32x64_avg_sse2, 12), - make_tuple(32, 32, highbd_sad32x32_avg_sse2, 12), - make_tuple(32, 16, highbd_sad32x16_avg_sse2, 12), - make_tuple(16, 32, highbd_sad16x32_avg_sse2, 12), - make_tuple(16, 16, highbd_sad16x16_avg_sse2, 12), - make_tuple(16, 8, highbd_sad16x8_avg_sse2, 12), - make_tuple(8, 16, highbd_sad8x16_avg_sse2, 12), - make_tuple(8, 8, highbd_sad8x8_avg_sse2, 12), - make_tuple(8, 4, highbd_sad8x4_avg_sse2, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests)); -const SadMxNx4Func sad64x64x4d_sse2 = vpx_sad64x64x4d_sse2; -const SadMxNx4Func sad64x32x4d_sse2 = vpx_sad64x32x4d_sse2; -const SadMxNx4Func sad32x64x4d_sse2 = vpx_sad32x64x4d_sse2; -const SadMxNx4Func sad32x32x4d_sse2 = vpx_sad32x32x4d_sse2; -const SadMxNx4Func sad32x16x4d_sse2 = vpx_sad32x16x4d_sse2; -const SadMxNx4Func sad16x32x4d_sse2 = vpx_sad16x32x4d_sse2; -const SadMxNx4Func sad16x16x4d_sse2 = vpx_sad16x16x4d_sse2; -const SadMxNx4Func sad16x8x4d_sse2 = vpx_sad16x8x4d_sse2; -const SadMxNx4Func sad8x16x4d_sse2 = vpx_sad8x16x4d_sse2; -const SadMxNx4Func sad8x8x4d_sse2 = vpx_sad8x8x4d_sse2; -const SadMxNx4Func sad8x4x4d_sse2 = vpx_sad8x4x4d_sse2; -#if CONFIG_VP9_HIGHBITDEPTH -const SadMxNx4Func highbd_sad64x64x4d_sse2 = vpx_highbd_sad64x64x4d_sse2; -const SadMxNx4Func highbd_sad64x32x4d_sse2 = vpx_highbd_sad64x32x4d_sse2; -const SadMxNx4Func highbd_sad32x64x4d_sse2 = vpx_highbd_sad32x64x4d_sse2; -const SadMxNx4Func highbd_sad32x32x4d_sse2 = vpx_highbd_sad32x32x4d_sse2; -const SadMxNx4Func highbd_sad32x16x4d_sse2 = vpx_highbd_sad32x16x4d_sse2; -const SadMxNx4Func highbd_sad16x32x4d_sse2 = vpx_highbd_sad16x32x4d_sse2; -const SadMxNx4Func highbd_sad16x16x4d_sse2 = vpx_highbd_sad16x16x4d_sse2; -const SadMxNx4Func highbd_sad16x8x4d_sse2 = vpx_highbd_sad16x8x4d_sse2; -const SadMxNx4Func highbd_sad8x16x4d_sse2 = vpx_highbd_sad8x16x4d_sse2; -const SadMxNx4Func highbd_sad8x8x4d_sse2 = vpx_highbd_sad8x8x4d_sse2; -const SadMxNx4Func highbd_sad8x4x4d_sse2 = vpx_highbd_sad8x4x4d_sse2; -const SadMxNx4Func highbd_sad4x8x4d_sse2 = vpx_highbd_sad4x8x4d_sse2; -const SadMxNx4Func highbd_sad4x4x4d_sse2 = vpx_highbd_sad4x4x4d_sse2; -#endif // CONFIG_VP9_HIGHBITDEPTH const SadMxNx4Param x4d_sse2_tests[] = { - make_tuple(64, 64, sad64x64x4d_sse2, -1), - make_tuple(64, 32, sad64x32x4d_sse2, -1), - make_tuple(32, 64, sad32x64x4d_sse2, -1), - make_tuple(32, 32, sad32x32x4d_sse2, -1), - make_tuple(32, 16, sad32x16x4d_sse2, -1), - make_tuple(16, 32, sad16x32x4d_sse2, -1), - make_tuple(16, 16, sad16x16x4d_sse2, -1), - make_tuple(16, 8, sad16x8x4d_sse2, -1), - make_tuple(8, 16, sad8x16x4d_sse2, -1), - make_tuple(8, 8, sad8x8x4d_sse2, -1), - make_tuple(8, 4, sad8x4x4d_sse2, -1), + make_tuple(64, 64, &vpx_sad64x64x4d_sse2, -1), + make_tuple(64, 32, &vpx_sad64x32x4d_sse2, -1), + make_tuple(32, 64, &vpx_sad32x64x4d_sse2, -1), + make_tuple(32, 32, &vpx_sad32x32x4d_sse2, -1), + make_tuple(32, 16, &vpx_sad32x16x4d_sse2, -1), + make_tuple(16, 32, &vpx_sad16x32x4d_sse2, -1), + make_tuple(16, 16, &vpx_sad16x16x4d_sse2, -1), + make_tuple(16, 8, &vpx_sad16x8x4d_sse2, -1), + make_tuple(8, 16, &vpx_sad8x16x4d_sse2, -1), + make_tuple(8, 8, &vpx_sad8x8x4d_sse2, -1), + make_tuple(8, 4, &vpx_sad8x4x4d_sse2, -1), + make_tuple(4, 8, &vpx_sad4x8x4d_sse2, -1), + make_tuple(4, 4, &vpx_sad4x4x4d_sse2, -1), #if CONFIG_VP9_HIGHBITDEPTH - make_tuple(64, 64, highbd_sad64x64x4d_sse2, 8), - make_tuple(64, 32, highbd_sad64x32x4d_sse2, 8), - make_tuple(32, 64, highbd_sad32x64x4d_sse2, 8), - make_tuple(32, 32, highbd_sad32x32x4d_sse2, 8), - make_tuple(32, 16, highbd_sad32x16x4d_sse2, 8), - make_tuple(16, 32, highbd_sad16x32x4d_sse2, 8), - make_tuple(16, 16, highbd_sad16x16x4d_sse2, 8), - make_tuple(16, 8, highbd_sad16x8x4d_sse2, 8), - make_tuple(8, 16, highbd_sad8x16x4d_sse2, 8), - make_tuple(8, 8, highbd_sad8x8x4d_sse2, 8), - make_tuple(8, 4, highbd_sad8x4x4d_sse2, 8), - make_tuple(4, 8, highbd_sad4x8x4d_sse2, 8), - make_tuple(4, 4, highbd_sad4x4x4d_sse2, 8), - make_tuple(64, 64, highbd_sad64x64x4d_sse2, 10), - make_tuple(64, 32, highbd_sad64x32x4d_sse2, 10), - make_tuple(32, 64, highbd_sad32x64x4d_sse2, 10), - make_tuple(32, 32, highbd_sad32x32x4d_sse2, 10), - make_tuple(32, 16, highbd_sad32x16x4d_sse2, 10), - make_tuple(16, 32, highbd_sad16x32x4d_sse2, 10), - make_tuple(16, 16, highbd_sad16x16x4d_sse2, 10), - make_tuple(16, 8, highbd_sad16x8x4d_sse2, 10), - make_tuple(8, 16, highbd_sad8x16x4d_sse2, 10), - make_tuple(8, 8, highbd_sad8x8x4d_sse2, 10), - make_tuple(8, 4, highbd_sad8x4x4d_sse2, 10), - make_tuple(4, 8, highbd_sad4x8x4d_sse2, 10), - make_tuple(4, 4, highbd_sad4x4x4d_sse2, 10), - make_tuple(64, 64, highbd_sad64x64x4d_sse2, 12), - make_tuple(64, 32, highbd_sad64x32x4d_sse2, 12), - make_tuple(32, 64, highbd_sad32x64x4d_sse2, 12), - make_tuple(32, 32, highbd_sad32x32x4d_sse2, 12), - make_tuple(32, 16, highbd_sad32x16x4d_sse2, 12), - make_tuple(16, 32, highbd_sad16x32x4d_sse2, 12), - make_tuple(16, 16, highbd_sad16x16x4d_sse2, 12), - make_tuple(16, 8, highbd_sad16x8x4d_sse2, 12), - make_tuple(8, 16, highbd_sad8x16x4d_sse2, 12), - make_tuple(8, 8, highbd_sad8x8x4d_sse2, 12), - make_tuple(8, 4, highbd_sad8x4x4d_sse2, 12), - make_tuple(4, 8, highbd_sad4x8x4d_sse2, 12), - make_tuple(4, 4, highbd_sad4x4x4d_sse2, 12), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 8), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 8), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 8), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 8), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 8), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 8), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 8), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 8), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 8), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 8), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 8), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 10), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 10), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 10), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 10), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 10), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 10), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 10), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 10), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 10), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 10), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 10), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 10), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 10), + make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 12), + make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 12), + make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 12), + make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 12), + make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 12), + make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 12), + make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 12), + make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 12), + make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 12), + make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 12), + make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 12), + make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 12), + make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); @@ -1076,39 +868,27 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); #endif // HAVE_SSE4_1 #if HAVE_AVX2 -const SadMxNFunc sad64x64_avx2 = vpx_sad64x64_avx2; -const SadMxNFunc sad64x32_avx2 = vpx_sad64x32_avx2; -const SadMxNFunc sad32x64_avx2 = vpx_sad32x64_avx2; -const SadMxNFunc sad32x32_avx2 = vpx_sad32x32_avx2; -const SadMxNFunc sad32x16_avx2 = vpx_sad32x16_avx2; const SadMxNParam avx2_tests[] = { - make_tuple(64, 64, sad64x64_avx2, -1), - make_tuple(64, 32, sad64x32_avx2, -1), - make_tuple(32, 64, sad32x64_avx2, -1), - make_tuple(32, 32, sad32x32_avx2, -1), - make_tuple(32, 16, sad32x16_avx2, -1), + make_tuple(64, 64, &vpx_sad64x64_avx2, -1), + make_tuple(64, 32, &vpx_sad64x32_avx2, -1), + make_tuple(32, 64, &vpx_sad32x64_avx2, -1), + make_tuple(32, 32, &vpx_sad32x32_avx2, -1), + make_tuple(32, 16, &vpx_sad32x16_avx2, -1), }; INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests)); -const SadMxNAvgFunc sad64x64_avg_avx2 = vpx_sad64x64_avg_avx2; -const SadMxNAvgFunc sad64x32_avg_avx2 = vpx_sad64x32_avg_avx2; -const SadMxNAvgFunc sad32x64_avg_avx2 = vpx_sad32x64_avg_avx2; -const SadMxNAvgFunc sad32x32_avg_avx2 = vpx_sad32x32_avg_avx2; -const SadMxNAvgFunc sad32x16_avg_avx2 = vpx_sad32x16_avg_avx2; const SadMxNAvgParam avg_avx2_tests[] = { - make_tuple(64, 64, sad64x64_avg_avx2, -1), - make_tuple(64, 32, sad64x32_avg_avx2, -1), - make_tuple(32, 64, sad32x64_avg_avx2, -1), - make_tuple(32, 32, sad32x32_avg_avx2, -1), - make_tuple(32, 16, sad32x16_avg_avx2, -1), + make_tuple(64, 64, &vpx_sad64x64_avg_avx2, -1), + make_tuple(64, 32, &vpx_sad64x32_avg_avx2, -1), + make_tuple(32, 64, &vpx_sad32x64_avg_avx2, -1), + make_tuple(32, 32, &vpx_sad32x32_avg_avx2, -1), + make_tuple(32, 16, &vpx_sad32x16_avg_avx2, -1), }; INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests)); -const SadMxNx4Func sad64x64x4d_avx2 = vpx_sad64x64x4d_avx2; -const SadMxNx4Func sad32x32x4d_avx2 = vpx_sad32x32x4d_avx2; const SadMxNx4Param x4d_avx2_tests[] = { - make_tuple(64, 64, sad64x64x4d_avx2, -1), - make_tuple(32, 32, sad32x32x4d_avx2, -1), + make_tuple(64, 64, &vpx_sad64x64x4d_avx2, -1), + make_tuple(32, 32, &vpx_sad32x32x4d_avx2, -1), }; INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); #endif // HAVE_AVX2 @@ -1116,93 +896,54 @@ INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); //------------------------------------------------------------------------------ // MIPS functions #if HAVE_MSA -const SadMxNFunc sad64x64_msa = vpx_sad64x64_msa; -const SadMxNFunc sad64x32_msa = vpx_sad64x32_msa; -const SadMxNFunc sad32x64_msa = vpx_sad32x64_msa; -const SadMxNFunc sad32x32_msa = vpx_sad32x32_msa; -const SadMxNFunc sad32x16_msa = vpx_sad32x16_msa; -const SadMxNFunc sad16x32_msa = vpx_sad16x32_msa; -const SadMxNFunc sad16x16_msa = vpx_sad16x16_msa; -const SadMxNFunc sad16x8_msa = vpx_sad16x8_msa; -const SadMxNFunc sad8x16_msa = vpx_sad8x16_msa; -const SadMxNFunc sad8x8_msa = vpx_sad8x8_msa; -const SadMxNFunc sad8x4_msa = vpx_sad8x4_msa; -const SadMxNFunc sad4x8_msa = vpx_sad4x8_msa; -const SadMxNFunc sad4x4_msa = vpx_sad4x4_msa; const SadMxNParam msa_tests[] = { - make_tuple(64, 64, sad64x64_msa, -1), - make_tuple(64, 32, sad64x32_msa, -1), - make_tuple(32, 64, sad32x64_msa, -1), - make_tuple(32, 32, sad32x32_msa, -1), - make_tuple(32, 16, sad32x16_msa, -1), - make_tuple(16, 32, sad16x32_msa, -1), - make_tuple(16, 16, sad16x16_msa, -1), - make_tuple(16, 8, sad16x8_msa, -1), - make_tuple(8, 16, sad8x16_msa, -1), - make_tuple(8, 8, sad8x8_msa, -1), - make_tuple(8, 4, sad8x4_msa, -1), - make_tuple(4, 8, sad4x8_msa, -1), - make_tuple(4, 4, sad4x4_msa, -1), + make_tuple(64, 64, &vpx_sad64x64_msa, -1), + make_tuple(64, 32, &vpx_sad64x32_msa, -1), + make_tuple(32, 64, &vpx_sad32x64_msa, -1), + make_tuple(32, 32, &vpx_sad32x32_msa, -1), + make_tuple(32, 16, &vpx_sad32x16_msa, -1), + make_tuple(16, 32, &vpx_sad16x32_msa, -1), + make_tuple(16, 16, &vpx_sad16x16_msa, -1), + make_tuple(16, 8, &vpx_sad16x8_msa, -1), + make_tuple(8, 16, &vpx_sad8x16_msa, -1), + make_tuple(8, 8, &vpx_sad8x8_msa, -1), + make_tuple(8, 4, &vpx_sad8x4_msa, -1), + make_tuple(4, 8, &vpx_sad4x8_msa, -1), + make_tuple(4, 4, &vpx_sad4x4_msa, -1), }; INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests)); -const SadMxNAvgFunc sad64x64_avg_msa = vpx_sad64x64_avg_msa; -const SadMxNAvgFunc sad64x32_avg_msa = vpx_sad64x32_avg_msa; -const SadMxNAvgFunc sad32x64_avg_msa = vpx_sad32x64_avg_msa; -const SadMxNAvgFunc sad32x32_avg_msa = vpx_sad32x32_avg_msa; -const SadMxNAvgFunc sad32x16_avg_msa = vpx_sad32x16_avg_msa; -const SadMxNAvgFunc sad16x32_avg_msa = vpx_sad16x32_avg_msa; -const SadMxNAvgFunc sad16x16_avg_msa = vpx_sad16x16_avg_msa; -const SadMxNAvgFunc sad16x8_avg_msa = vpx_sad16x8_avg_msa; -const SadMxNAvgFunc sad8x16_avg_msa = vpx_sad8x16_avg_msa; -const SadMxNAvgFunc sad8x8_avg_msa = vpx_sad8x8_avg_msa; -const SadMxNAvgFunc sad8x4_avg_msa = vpx_sad8x4_avg_msa; -const SadMxNAvgFunc sad4x8_avg_msa = vpx_sad4x8_avg_msa; -const SadMxNAvgFunc sad4x4_avg_msa = vpx_sad4x4_avg_msa; const SadMxNAvgParam avg_msa_tests[] = { - make_tuple(64, 64, sad64x64_avg_msa, -1), - make_tuple(64, 32, sad64x32_avg_msa, -1), - make_tuple(32, 64, sad32x64_avg_msa, -1), - make_tuple(32, 32, sad32x32_avg_msa, -1), - make_tuple(32, 16, sad32x16_avg_msa, -1), - make_tuple(16, 32, sad16x32_avg_msa, -1), - make_tuple(16, 16, sad16x16_avg_msa, -1), - make_tuple(16, 8, sad16x8_avg_msa, -1), - make_tuple(8, 16, sad8x16_avg_msa, -1), - make_tuple(8, 8, sad8x8_avg_msa, -1), - make_tuple(8, 4, sad8x4_avg_msa, -1), - make_tuple(4, 8, sad4x8_avg_msa, -1), - make_tuple(4, 4, sad4x4_avg_msa, -1), + make_tuple(64, 64, &vpx_sad64x64_avg_msa, -1), + make_tuple(64, 32, &vpx_sad64x32_avg_msa, -1), + make_tuple(32, 64, &vpx_sad32x64_avg_msa, -1), + make_tuple(32, 32, &vpx_sad32x32_avg_msa, -1), + make_tuple(32, 16, &vpx_sad32x16_avg_msa, -1), + make_tuple(16, 32, &vpx_sad16x32_avg_msa, -1), + make_tuple(16, 16, &vpx_sad16x16_avg_msa, -1), + make_tuple(16, 8, &vpx_sad16x8_avg_msa, -1), + make_tuple(8, 16, &vpx_sad8x16_avg_msa, -1), + make_tuple(8, 8, &vpx_sad8x8_avg_msa, -1), + make_tuple(8, 4, &vpx_sad8x4_avg_msa, -1), + make_tuple(4, 8, &vpx_sad4x8_avg_msa, -1), + make_tuple(4, 4, &vpx_sad4x4_avg_msa, -1), }; INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests)); -const SadMxNx4Func sad64x64x4d_msa = vpx_sad64x64x4d_msa; -const SadMxNx4Func sad64x32x4d_msa = vpx_sad64x32x4d_msa; -const SadMxNx4Func sad32x64x4d_msa = vpx_sad32x64x4d_msa; -const SadMxNx4Func sad32x32x4d_msa = vpx_sad32x32x4d_msa; -const SadMxNx4Func sad32x16x4d_msa = vpx_sad32x16x4d_msa; -const SadMxNx4Func sad16x32x4d_msa = vpx_sad16x32x4d_msa; -const SadMxNx4Func sad16x16x4d_msa = vpx_sad16x16x4d_msa; -const SadMxNx4Func sad16x8x4d_msa = vpx_sad16x8x4d_msa; -const SadMxNx4Func sad8x16x4d_msa = vpx_sad8x16x4d_msa; -const SadMxNx4Func sad8x8x4d_msa = vpx_sad8x8x4d_msa; -const SadMxNx4Func sad8x4x4d_msa = vpx_sad8x4x4d_msa; -const SadMxNx4Func sad4x8x4d_msa = vpx_sad4x8x4d_msa; -const SadMxNx4Func sad4x4x4d_msa = vpx_sad4x4x4d_msa; const SadMxNx4Param x4d_msa_tests[] = { - make_tuple(64, 64, sad64x64x4d_msa, -1), - make_tuple(64, 32, sad64x32x4d_msa, -1), - make_tuple(32, 64, sad32x64x4d_msa, -1), - make_tuple(32, 32, sad32x32x4d_msa, -1), - make_tuple(32, 16, sad32x16x4d_msa, -1), - make_tuple(16, 32, sad16x32x4d_msa, -1), - make_tuple(16, 16, sad16x16x4d_msa, -1), - make_tuple(16, 8, sad16x8x4d_msa, -1), - make_tuple(8, 16, sad8x16x4d_msa, -1), - make_tuple(8, 8, sad8x8x4d_msa, -1), - make_tuple(8, 4, sad8x4x4d_msa, -1), - make_tuple(4, 8, sad4x8x4d_msa, -1), - make_tuple(4, 4, sad4x4x4d_msa, -1), + make_tuple(64, 64, &vpx_sad64x64x4d_msa, -1), + make_tuple(64, 32, &vpx_sad64x32x4d_msa, -1), + make_tuple(32, 64, &vpx_sad32x64x4d_msa, -1), + make_tuple(32, 32, &vpx_sad32x32x4d_msa, -1), + make_tuple(32, 16, &vpx_sad32x16x4d_msa, -1), + make_tuple(16, 32, &vpx_sad16x32x4d_msa, -1), + make_tuple(16, 16, &vpx_sad16x16x4d_msa, -1), + make_tuple(16, 8, &vpx_sad16x8x4d_msa, -1), + make_tuple(8, 16, &vpx_sad8x16x4d_msa, -1), + make_tuple(8, 8, &vpx_sad8x8x4d_msa, -1), + make_tuple(8, 4, &vpx_sad8x4x4d_msa, -1), + make_tuple(4, 8, &vpx_sad4x8x4d_msa, -1), + make_tuple(4, 4, &vpx_sad4x4x4d_msa, -1), }; INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests)); #endif // HAVE_MSA diff --git a/libvpx/test/simple_encoder.sh b/libvpx/test/simple_encoder.sh index c4a628030..ee633ae99 100755 --- a/libvpx/test/simple_encoder.sh +++ b/libvpx/test/simple_encoder.sh @@ -23,7 +23,7 @@ simple_encoder_verify_environment() { fi } -# Runs simple_encoder using the codec specified by $1. +# Runs simple_encoder using the codec specified by $1 with a frame limit of 100. simple_encoder() { local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}" local codec="$1" @@ -35,7 +35,7 @@ simple_encoder() { fi eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ - "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \ ${devnull} [ -e "${output_file}" ] || return 1 @@ -47,16 +47,13 @@ simple_encoder_vp8() { fi } -# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this -# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast -# machine. -DISABLED_simple_encoder_vp9() { +simple_encoder_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then simple_encoder vp9 || return 1 fi } simple_encoder_tests="simple_encoder_vp8 - DISABLED_simple_encoder_vp9" + simple_encoder_vp9" run_tests simple_encoder_verify_environment "${simple_encoder_tests}" diff --git a/libvpx/test/sixtap_predict_test.cc b/libvpx/test/sixtap_predict_test.cc index 1e682e7bd..304a1484a 100644 --- a/libvpx/test/sixtap_predict_test.cc +++ b/libvpx/test/sixtap_predict_test.cc @@ -186,70 +186,48 @@ TEST_P(SixtapPredictTest, TestWithRandomData) { using std::tr1::make_tuple; -const SixtapPredictFunc sixtap_16x16_c = vp8_sixtap_predict16x16_c; -const SixtapPredictFunc sixtap_8x8_c = vp8_sixtap_predict8x8_c; -const SixtapPredictFunc sixtap_8x4_c = vp8_sixtap_predict8x4_c; -const SixtapPredictFunc sixtap_4x4_c = vp8_sixtap_predict4x4_c; INSTANTIATE_TEST_CASE_P( C, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_c), - make_tuple(8, 8, sixtap_8x8_c), - make_tuple(8, 4, sixtap_8x4_c), - make_tuple(4, 4, sixtap_4x4_c))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_c), + make_tuple(8, 8, &vp8_sixtap_predict8x8_c), + make_tuple(8, 4, &vp8_sixtap_predict8x4_c), + make_tuple(4, 4, &vp8_sixtap_predict4x4_c))); #if HAVE_NEON -const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon; -const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon; -const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon; INSTANTIATE_TEST_CASE_P( NEON, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_neon), - make_tuple(8, 8, sixtap_8x8_neon), - make_tuple(8, 4, sixtap_8x4_neon))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_neon), + make_tuple(8, 8, &vp8_sixtap_predict8x8_neon), + make_tuple(8, 4, &vp8_sixtap_predict8x4_neon))); #endif #if HAVE_MMX -const SixtapPredictFunc sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx; -const SixtapPredictFunc sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx; -const SixtapPredictFunc sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx; -const SixtapPredictFunc sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx; INSTANTIATE_TEST_CASE_P( MMX, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_mmx), - make_tuple(8, 8, sixtap_8x8_mmx), - make_tuple(8, 4, sixtap_8x4_mmx), - make_tuple(4, 4, sixtap_4x4_mmx))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx), + make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx), + make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx), + make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx))); #endif #if HAVE_SSE2 -const SixtapPredictFunc sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2; -const SixtapPredictFunc sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2; -const SixtapPredictFunc sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2; INSTANTIATE_TEST_CASE_P( SSE2, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_sse2), - make_tuple(8, 8, sixtap_8x8_sse2), - make_tuple(8, 4, sixtap_8x4_sse2))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2), + make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2), + make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2))); #endif #if HAVE_SSSE3 -const SixtapPredictFunc sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3; -const SixtapPredictFunc sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3; -const SixtapPredictFunc sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3; -const SixtapPredictFunc sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3; INSTANTIATE_TEST_CASE_P( SSSE3, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_ssse3), - make_tuple(8, 8, sixtap_8x8_ssse3), - make_tuple(8, 4, sixtap_8x4_ssse3), - make_tuple(4, 4, sixtap_4x4_ssse3))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3), + make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3), + make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3), + make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3))); #endif #if HAVE_MSA -const SixtapPredictFunc sixtap_16x16_msa = vp8_sixtap_predict16x16_msa; -const SixtapPredictFunc sixtap_8x8_msa = vp8_sixtap_predict8x8_msa; -const SixtapPredictFunc sixtap_8x4_msa = vp8_sixtap_predict8x4_msa; -const SixtapPredictFunc sixtap_4x4_msa = vp8_sixtap_predict4x4_msa; INSTANTIATE_TEST_CASE_P( MSA, SixtapPredictTest, ::testing::Values( - make_tuple(16, 16, sixtap_16x16_msa), - make_tuple(8, 8, sixtap_8x8_msa), - make_tuple(8, 4, sixtap_8x4_msa), - make_tuple(4, 4, sixtap_4x4_msa))); + make_tuple(16, 16, &vp8_sixtap_predict16x16_msa), + make_tuple(8, 8, &vp8_sixtap_predict8x8_msa), + make_tuple(8, 4, &vp8_sixtap_predict8x4_msa), + make_tuple(4, 4, &vp8_sixtap_predict4x4_msa))); #endif } // namespace diff --git a/libvpx/test/superframe_test.cc b/libvpx/test/superframe_test.cc index 90aa75b41..b07bcb284 100644 --- a/libvpx/test/superframe_test.cc +++ b/libvpx/test/superframe_test.cc @@ -17,7 +17,6 @@ namespace { const int kTestMode = 0; -const int kSuperframeSyntax = 1; typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam; @@ -32,11 +31,9 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, InitializeConfig(); const SuperframeTestParam input = GET_PARAM(1); const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input); - const int syntax = std::tr1::get<kSuperframeSyntax>(input); SetMode(mode); sf_count_ = 0; sf_count_max_ = INT_MAX; - is_vp10_style_superframe_ = syntax; } virtual void TearDown() { @@ -59,8 +56,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, const uint8_t marker = buffer[pkt->data.frame.sz - 1]; const int frames = (marker & 0x7) + 1; const int mag = ((marker >> 3) & 3) + 1; - const unsigned int index_sz = - 2 + mag * (frames - is_vp10_style_superframe_); + const unsigned int index_sz = 2 + mag * frames; if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && buffer[pkt->data.frame.sz - index_sz] == marker) { @@ -85,7 +81,6 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, return pkt; } - int is_vp10_style_superframe_; int sf_count_; int sf_count_max_; vpx_codec_cx_pkt_t modified_pkt_; @@ -106,8 +101,4 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) { VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine( ::testing::Values(::libvpx_test::kTwoPassGood), ::testing::Values(0))); - -VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine( - ::testing::Values(::libvpx_test::kTwoPassGood), - ::testing::Values(CONFIG_MISC_FIXES))); } // namespace diff --git a/libvpx/test/test-data.mk b/libvpx/test/test-data.mk index 4280b35f8..05a0885ed 100644 --- a/libvpx/test/test-data.mk +++ b/libvpx/test/test-data.mk @@ -418,6 +418,18 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm @@ -550,6 +562,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm @@ -642,6 +656,34 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm @@ -769,3 +811,53 @@ endif # CONFIG_ENCODE_PERF_TESTS # sort and remove duplicates LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes)) + +# VP9 dynamic resizing test (decoder) +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5 diff --git a/libvpx/test/test-data.sha1 b/libvpx/test/test-data.sha1 index 4e4ac6237..a4ed1742f 100644 --- a/libvpx/test/test-data.sha1 +++ b/libvpx/test/test-data.sha1 @@ -550,6 +550,8 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f *vp90-2-03-size-226x202.webm 83c6d8f2969b759e10e5c6542baca1265c874c29 *vp90-2-03-size-226x224.webm.md5 fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce *vp90-2-03-size-226x226.webm 94ad19b8b699cea105e2ff18f0df2afd7242bcf7 *vp90-2-03-size-226x226.webm.md5 +52bc1dfd3a97b24d922eb8a31d07527891561f2a *vp90-2-03-size-352x288.webm +3084d6d0a1eec22e85a394422fbc8faae58930a5 *vp90-2-03-size-352x288.webm.md5 b6524e4084d15b5d0caaa3d3d1368db30cbee69c *vp90-2-03-deltaq.webm 65f45ec9a55537aac76104818278e0978f94a678 *vp90-2-03-deltaq.webm.md5 4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba *vp90-2-05-resize.ivf @@ -744,3 +746,91 @@ e60d859b0ef2b331b21740cf6cb83fabe469b079 *invalid-vp90-2-03-size-202x210.webm.iv 0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m 5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m +85771f6ab44e4a0226e206c0cde8351dd5918953 *vp90-2-02-size-130x132.webm +512dad5eabbed37b4bbbc64ce153f1a5484427b8 *vp90-2-02-size-130x132.webm.md5 +01f7127d40360289db63b27f61cb9afcda350e95 *vp90-2-02-size-132x130.webm +4a94275328ae076cf60f966c097a8721010fbf5a *vp90-2-02-size-132x130.webm.md5 +f41c0400b5716b4b70552c40dd03d44be131e1cc *vp90-2-02-size-132x132.webm +1a69e989f697e424bfe3e3e8a77bb0c0992c8e47 *vp90-2-02-size-132x132.webm.md5 +94a5cbfacacba100e0c5f7861c72a1b417feca0f *vp90-2-02-size-178x180.webm +dedfecf1d784bcf70629592fa5e6f01d5441ccc9 *vp90-2-02-size-178x180.webm.md5 +4828b62478c04014bba3095a83106911a71cf387 *vp90-2-02-size-180x178.webm +423da2b861050c969d78ed8e8f8f14045d1d8199 *vp90-2-02-size-180x178.webm.md5 +338f7c9282f43e29940f5391118aadd17e4f9234 *vp90-2-02-size-180x180.webm +6c2ef013392310778dca5dd5351160eca66b0a60 *vp90-2-02-size-180x180.webm.md5 +679fa7d6807e936ff937d7b282e7dbd8ac76447e *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm +fc7267ab8fc2bf5d6c234e34ee6c078a967b4888 *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5 +9d33a137c819792209c5ce4e4e1ee5da73d574fe *vp90-2-14-resize-10frames-fp-tiles-1-2.webm +0c78a154956a8605d050bdd75e0dcc4d39c040a6 *vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5 +d6a8d8c57f66a91d23e8e7df480f9ae841e56c37 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm +e9b4e8c7b33b5fda745d340c3f47e6623ae40cf2 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5 +aa6fe043a0c4a42b49c87ebbe812d4afd9945bec *vp90-2-14-resize-10frames-fp-tiles-1-8.webm +028520578994c2d013d4c0129033d4f2ff31bbe0 *vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5 +d1d5463c9ea7b5cc5f609ddedccddf656f348d1a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm +92d5872f5bdffbed721703b7e959b4f885e3d77a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5 +677cb29de1215d97346015af5807a9b1faad54cf *vp90-2-14-resize-10frames-fp-tiles-2-4.webm +a5db19f977094ec3fd60b4f7671b3e6740225e12 *vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5 +cdd3c52ba21067efdbb2de917fe2a965bf27332e *vp90-2-14-resize-10frames-fp-tiles-2-8.webm +db17ec5d894ea8b8d0b7f32206d0dd3d46dcfa6d *vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5 +0f6093c472125d05b764d7d1965c1d56771c0ea2 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm +bc7c79e1bee07926dd970462ce6f64fc30eec3e1 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5 +c5142e2bff4091338196c8ea8bc9266e64f548bc *vp90-2-14-resize-10frames-fp-tiles-4-2.webm +22aa3dd430b69fd3d92f6561bac86deeed90486d *vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5 +ede8b1466d2f26e1b1bd9602addb9cd1017e1d8c *vp90-2-14-resize-10frames-fp-tiles-4-8.webm +508d5ebb9c0eac2a4100281a3ee052ec2fc19217 *vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5 +2b292e3392854cd1d76ae597a6f53656cf741cfa *vp90-2-14-resize-10frames-fp-tiles-8-1.webm +1c24e54fa19e94e1722f24676404444e941c3d31 *vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5 +61beda21064e09634564caa6697ab90bd53c9af7 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm +9c0657b4d9e1d0e4c9d28a90e5a8630a65519124 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5 +1758c50a11a7c92522749b4a251664705f1f0d4b *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm +4f454a06750614314ae15a44087b79016fe2db97 *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5 +3920c95ba94f1f048a731d9d9b416043b44aa4bd *vp90-2-14-resize-10frames-fp-tiles-8-4.webm +4eb347a0456d2c49a1e1d8de5aa1c51acc39887e *vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5 +4b95a74c032a473b6683d7ad5754db1b0ec378e9 *vp90-2-21-resize_inter_1280x720_5_1-2.webm +a7826dd386bedfe69d02736969bfb47fb6a40a5e *vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5 +5cfff79e82c4d69964ccb8e75b4f0c53b9295167 *vp90-2-21-resize_inter_1280x720_5_3-4.webm +a18f57db4a25e1f543a99f2ceb182e00db0ee22f *vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5 +d26db0811bf30eb4131d928669713e2485f8e833 *vp90-2-21-resize_inter_1280x720_7_1-2.webm +fd6f9f332cd5bea4c0f0d57be4297bea493cc5a1 *vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5 +5c7d73d4d268e2ba9593b31cb091fd339505c7fd *vp90-2-21-resize_inter_1280x720_7_3-4.webm +7bbb949cabc1e70dadcc74582739f63b833034e0 *vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5 +f2d2a41a60eb894aff0c5854afca15931f1445a8 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm +66d7789992613ac9d678ff905ff1059daa1b89e4 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5 +764edb75fe7dd64e73a1b4f3b4b2b1bf237a4dea *vp90-2-21-resize_inter_1920x1080_5_3-4.webm +f78bea1075983fd990e7f25d4f31438f9b5efa34 *vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5 +96496f2ade764a5de9f0c27917c7df1f120fb2ef *vp90-2-21-resize_inter_1920x1080_7_1-2.webm +2632b635135ed5ecd67fd22dec7990d29c4f4cb5 *vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5 +74889ea42001bf41428cb742ca74e65129c886dc *vp90-2-21-resize_inter_1920x1080_7_3-4.webm +d2cf3b25956415bb579d368e7098097e482dd73a *vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5 +4658986a8ce36ebfcc80a1903e446eaab3985336 *vp90-2-21-resize_inter_320x180_5_1-2.webm +8a3d8cf325109ffa913cc9426c32eea8c202a09a *vp90-2-21-resize_inter_320x180_5_1-2.webm.md5 +16303aa45176520ee42c2c425247aadc1506b881 *vp90-2-21-resize_inter_320x180_5_3-4.webm +41cab1ddf7715b680a4dbce42faa9bcd72af4e5c *vp90-2-21-resize_inter_320x180_5_3-4.webm.md5 +56648adcee66dd0e5cb6ac947f5ee1b9cc8ba129 *vp90-2-21-resize_inter_320x180_7_1-2.webm +70047377787003cc03dda7b2394e6d7eaa666d9e *vp90-2-21-resize_inter_320x180_7_1-2.webm.md5 +d2ff99165488499cc55f75929f1ce5ca9c9e359b *vp90-2-21-resize_inter_320x180_7_3-4.webm +e69019e378114a4643db283b66d1a7e304761a56 *vp90-2-21-resize_inter_320x180_7_3-4.webm.md5 +4834d129bed0f4289d3a88f2ae3a1736f77621b0 *vp90-2-21-resize_inter_320x240_5_1-2.webm +a75653c53d22b623c1927fc0088da21dafef21f4 *vp90-2-21-resize_inter_320x240_5_1-2.webm.md5 +19818e1b7fd1c1e63d8873c31b0babe29dd33ba6 *vp90-2-21-resize_inter_320x240_5_3-4.webm +8d89814ff469a186312111651b16601dfbce4336 *vp90-2-21-resize_inter_320x240_5_3-4.webm.md5 +ac8057bae52498f324ce92a074d5f8207cc4a4a7 *vp90-2-21-resize_inter_320x240_7_1-2.webm +2643440898c83c08cc47bc744245af696b877c24 *vp90-2-21-resize_inter_320x240_7_1-2.webm.md5 +cf4a4cd38ac8b18c42d8c25a3daafdb39132256b *vp90-2-21-resize_inter_320x240_7_3-4.webm +70ba8ec9120b26e9b0ffa2c79b432f16cbcb50ec *vp90-2-21-resize_inter_320x240_7_3-4.webm.md5 +669f10409fe1c4a054010162ca47773ea1fdbead *vp90-2-21-resize_inter_640x360_5_1-2.webm +6355a04249004a35fb386dd1024214234f044383 *vp90-2-21-resize_inter_640x360_5_1-2.webm.md5 +c23763b950b8247c1775d1f8158d93716197676c *vp90-2-21-resize_inter_640x360_5_3-4.webm +59e6fc381e3ec3b7bdaac586334e0bc944d18fb6 *vp90-2-21-resize_inter_640x360_5_3-4.webm.md5 +71b45cbfdd068baa1f679a69e5e6f421d256a85f *vp90-2-21-resize_inter_640x360_7_1-2.webm +1416fc761b690c54a955c4cf017fa078520e8c18 *vp90-2-21-resize_inter_640x360_7_1-2.webm.md5 +6c409903279448a697e4db63bab1061784bcd8d2 *vp90-2-21-resize_inter_640x360_7_3-4.webm +60de1299793433a630b71130cf76c9f5965758e2 *vp90-2-21-resize_inter_640x360_7_3-4.webm.md5 +852b597b8af096d90c80bf0ed6ed3b336b851f19 *vp90-2-21-resize_inter_640x480_5_1-2.webm +f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.webm.md5 +792a16c6f60043bd8dceb515f0b95b8891647858 *vp90-2-21-resize_inter_640x480_5_3-4.webm +68ffe59877e9a7863805e1c0a3ce18ce037d7c9d *vp90-2-21-resize_inter_640x480_5_3-4.webm.md5 +61e044c4759972a35ea3db8c1478a988910a4ef4 *vp90-2-21-resize_inter_640x480_7_1-2.webm +7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5 +7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm +4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5 diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk index 8d662448a..2d50ce813 100644 --- a/libvpx/test/test.mk +++ b/libvpx/test/test.mk @@ -18,15 +18,17 @@ LIBVPX_TEST_SRCS-yes += video_source.h LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h -LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc @@ -44,6 +46,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.h @@ -58,10 +61,10 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h ## WebM Parsing ifeq ($(CONFIG_WEBM_IO), yes) -LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp -LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.cpp -LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.hpp -LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.hpp +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS) LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc @@ -92,10 +95,9 @@ endif ## shared library builds don't make these functions accessible. ## ifeq ($(CONFIG_SHARED),) -LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc ## VP8 -ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),) +ifeq ($(CONFIG_VP8),yes) # These tests require both the encoder and decoder to be built. ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) @@ -103,12 +105,13 @@ LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc LIBVPX_TEST_SRCS-yes += vp8_fragments_test.cc endif +LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += add_noise_test.cc LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc LIBVPX_TEST_SRCS-yes += idct_test.cc LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc @@ -121,7 +124,7 @@ endif endif # VP8 ## VP9 -ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) +ifeq ($(CONFIG_VP9),yes) # These tests require both the encoder and decoder to be built. ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes) @@ -134,25 +137,27 @@ LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc LIBVPX_TEST_SRCS-yes += vp9_encoder_parms_get_to_decoder.cc endif -LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc +LIBVPX_TEST_SRCS-yes += convolve_test.cc +LIBVPX_TEST_SRCS-yes += lpf_8_test.cc +LIBVPX_TEST_SRCS-yes += vp9_intrapred_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc - endif ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes) @@ -162,14 +167,12 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc endif # VP9 -LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc +## Multi-codec / unconditional whitebox tests. -TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc -TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc -## VP10 -LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc +TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc +TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c endif # CONFIG_SHARED diff --git a/libvpx/test/test_intra_pred_speed.cc b/libvpx/test/test_intra_pred_speed.cc index 5d59e83f7..2acf744d5 100644 --- a/libvpx/test/test_intra_pred_speed.cc +++ b/libvpx/test/test_intra_pred_speed.cc @@ -187,18 +187,20 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c, vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c, vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c) -#if HAVE_SSE && CONFIG_USE_X86INC -INTRA_PRED_TEST(SSE, TestIntraPred4, vpx_dc_predictor_4x4_sse, - vpx_dc_left_predictor_4x4_sse, vpx_dc_top_predictor_4x4_sse, - vpx_dc_128_predictor_4x4_sse, vpx_v_predictor_4x4_sse, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_sse) -#endif // HAVE_SSE && CONFIG_USE_X86INC +#if HAVE_SSE2 && CONFIG_USE_X86INC +INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2, + vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2, + vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2, + vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL, + NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL, + vpx_tm_predictor_4x4_sse2) +#endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, - vpx_h_predictor_4x4_ssse3, vpx_d45_predictor_4x4_ssse3, NULL, - NULL, vpx_d153_predictor_4x4_ssse3, - vpx_d207_predictor_4x4_ssse3, vpx_d63_predictor_4x4_ssse3, NULL) + NULL, NULL, NULL, NULL, + vpx_d153_predictor_4x4_ssse3, NULL, + vpx_d63_predictor_4x4_ssse3, NULL) #endif // HAVE_SSSE3 && CONFIG_USE_X86INC #if HAVE_DSPR2 @@ -235,23 +237,19 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c, vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c, vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c) -#if HAVE_SSE && CONFIG_USE_X86INC -INTRA_PRED_TEST(SSE, TestIntraPred8, vpx_dc_predictor_8x8_sse, - vpx_dc_left_predictor_8x8_sse, vpx_dc_top_predictor_8x8_sse, - vpx_dc_128_predictor_8x8_sse, vpx_v_predictor_8x8_sse, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL) -#endif // HAVE_SSE && CONFIG_USE_X86INC - #if HAVE_SSE2 && CONFIG_USE_X86INC -INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2) +INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2, + vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2, + vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2, + vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL, + NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2) #endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, - vpx_h_predictor_8x8_ssse3, vpx_d45_predictor_8x8_ssse3, NULL, - NULL, vpx_d153_predictor_8x8_ssse3, - vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL) + NULL, NULL, NULL, NULL, + vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3, + vpx_d63_predictor_8x8_ssse3, NULL) #endif // HAVE_SSSE3 && CONFIG_USE_X86INC #if HAVE_DSPR2 @@ -293,13 +291,13 @@ INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2, vpx_dc_left_predictor_16x16_sse2, vpx_dc_top_predictor_16x16_sse2, vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, + vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_sse2) #endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, - vpx_h_predictor_16x16_ssse3, vpx_d45_predictor_16x16_ssse3, + NULL, vpx_d45_predictor_16x16_ssse3, NULL, NULL, vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3, NULL) @@ -340,28 +338,19 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c, vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c) #if HAVE_SSE2 && CONFIG_USE_X86INC -#if ARCH_X86_64 -INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, - vpx_dc_left_predictor_32x32_sse2, - vpx_dc_top_predictor_32x32_sse2, - vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, - vpx_tm_predictor_32x32_sse2) -#else INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, vpx_dc_left_predictor_32x32_sse2, vpx_dc_top_predictor_32x32_sse2, vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) -#endif // ARCH_X86_64 + vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, + NULL, vpx_tm_predictor_32x32_sse2) #endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, - vpx_h_predictor_32x32_ssse3, vpx_d45_predictor_32x32_ssse3, - NULL, NULL, vpx_d153_predictor_32x32_ssse3, - vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3, - NULL) + NULL, vpx_d45_predictor_32x32_ssse3, NULL, NULL, + vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3, + vpx_d63_predictor_32x32_ssse3, NULL) #endif // HAVE_SSSE3 && CONFIG_USE_X86INC #if HAVE_NEON diff --git a/libvpx/test/test_vector_test.cc b/libvpx/test/test_vector_test.cc index 437ce44b6..f1aa4d7f7 100644 --- a/libvpx/test/test_vector_test.cc +++ b/libvpx/test/test_vector_test.cc @@ -10,6 +10,7 @@ #include <cstdio> #include <cstdlib> +#include <set> #include <string> #include "third_party/googletest/src/include/gtest/gtest.h" #include "../tools_common.h" @@ -44,6 +45,12 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) { +#if CONFIG_VP9_DECODER + resize_clips_.insert( + ::libvpx_test::kVP9TestVectorsResize, + ::libvpx_test::kVP9TestVectorsResize + + ::libvpx_test::kNumVP9TestVectorsResize); +#endif } virtual ~TestVectorTest() { @@ -77,6 +84,10 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, << "Md5 checksums don't match: frame number = " << frame_number; } +#if CONFIG_VP9_DECODER + std::set<std::string> resize_clips_; +#endif + private: FILE *md5_file_; }; @@ -92,11 +103,19 @@ TEST_P(TestVectorTest, MD5Match) { const int mode = std::tr1::get<kDecodeMode>(input); libvpx_test::CompressedVideoSource *video = NULL; vpx_codec_flags_t flags = 0; - vpx_codec_dec_cfg_t cfg = {0}; + vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); char str[256]; if (mode == kFrameParallelMode) { flags |= VPX_CODEC_USE_FRAME_THREADING; +#if CONFIG_VP9_DECODER + // TODO(hkuang): Fix frame parallel decode bug. See issue 1086. + if (resize_clips_.find(filename) != resize_clips_.end()) { + printf("Skipping the test file: %s, due to frame parallel decode bug.\n", + filename.c_str()); + return; + } +#endif } cfg.threads = threads; diff --git a/libvpx/test/test_vectors.cc b/libvpx/test/test_vectors.cc index 434a38251..c82247966 100644 --- a/libvpx/test/test_vectors.cc +++ b/libvpx/test/test_vectors.cc @@ -52,6 +52,31 @@ const char *const kVP8TestVectors[] = { const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER +#define RESIZE_TEST_VECTORS "vp90-2-21-resize_inter_320x180_5_1-2.webm", \ + "vp90-2-21-resize_inter_320x180_5_3-4.webm", \ + "vp90-2-21-resize_inter_320x180_7_1-2.webm", \ + "vp90-2-21-resize_inter_320x180_7_3-4.webm", \ + "vp90-2-21-resize_inter_320x240_5_1-2.webm", \ + "vp90-2-21-resize_inter_320x240_5_3-4.webm", \ + "vp90-2-21-resize_inter_320x240_7_1-2.webm", \ + "vp90-2-21-resize_inter_320x240_7_3-4.webm", \ + "vp90-2-21-resize_inter_640x360_5_1-2.webm", \ + "vp90-2-21-resize_inter_640x360_5_3-4.webm", \ + "vp90-2-21-resize_inter_640x360_7_1-2.webm", \ + "vp90-2-21-resize_inter_640x360_7_3-4.webm", \ + "vp90-2-21-resize_inter_640x480_5_1-2.webm", \ + "vp90-2-21-resize_inter_640x480_5_3-4.webm", \ + "vp90-2-21-resize_inter_640x480_7_1-2.webm", \ + "vp90-2-21-resize_inter_640x480_7_3-4.webm", \ + "vp90-2-21-resize_inter_1280x720_5_1-2.webm", \ + "vp90-2-21-resize_inter_1280x720_5_3-4.webm", \ + "vp90-2-21-resize_inter_1280x720_7_1-2.webm", \ + "vp90-2-21-resize_inter_1280x720_7_3-4.webm", \ + "vp90-2-21-resize_inter_1920x1080_5_1-2.webm", \ + "vp90-2-21-resize_inter_1920x1080_5_3-4.webm", \ + "vp90-2-21-resize_inter_1920x1080_7_1-2.webm", \ + "vp90-2-21-resize_inter_1920x1080_7_3-4.webm", + const char *const kVP9TestVectors[] = { "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm", "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm", @@ -120,7 +145,10 @@ const char *const kVP9TestVectors[] = { "vp90-2-02-size-66x10.webm", "vp90-2-02-size-66x16.webm", "vp90-2-02-size-66x18.webm", "vp90-2-02-size-66x32.webm", "vp90-2-02-size-66x34.webm", "vp90-2-02-size-66x64.webm", - "vp90-2-02-size-66x66.webm", "vp90-2-03-size-196x196.webm", + "vp90-2-02-size-66x66.webm", "vp90-2-02-size-130x132.webm", + "vp90-2-02-size-132x130.webm", "vp90-2-02-size-132x132.webm", + "vp90-2-02-size-178x180.webm", "vp90-2-02-size-180x178.webm", + "vp90-2-02-size-180x180.webm", "vp90-2-03-size-196x196.webm", "vp90-2-03-size-196x198.webm", "vp90-2-03-size-196x200.webm", "vp90-2-03-size-196x202.webm", "vp90-2-03-size-196x208.webm", "vp90-2-03-size-196x210.webm", "vp90-2-03-size-196x224.webm", @@ -152,7 +180,8 @@ const char *const kVP9TestVectors[] = { "vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm", "vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm", "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm", - "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm", + "vp90-2-03-size-226x226.webm", "vp90-2-03-size-352x288.webm", + "vp90-2-03-deltaq.webm", "vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm", "vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm", @@ -182,6 +211,20 @@ const char *const kVP9TestVectors[] = { "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", + "vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm", + "vp90-2-14-resize-10frames-fp-tiles-1-2.webm", + "vp90-2-14-resize-10frames-fp-tiles-1-4.webm", + "vp90-2-14-resize-10frames-fp-tiles-1-8.webm", + "vp90-2-14-resize-10frames-fp-tiles-2-1.webm", + "vp90-2-14-resize-10frames-fp-tiles-2-4.webm", + "vp90-2-14-resize-10frames-fp-tiles-2-8.webm", + "vp90-2-14-resize-10frames-fp-tiles-4-1.webm", + "vp90-2-14-resize-10frames-fp-tiles-4-2.webm", + "vp90-2-14-resize-10frames-fp-tiles-4-8.webm", + "vp90-2-14-resize-10frames-fp-tiles-8-1.webm", + "vp90-2-14-resize-10frames-fp-tiles-8-2.webm", + "vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm", + "vp90-2-14-resize-10frames-fp-tiles-8-4.webm", "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm", "vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm", "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm", @@ -193,10 +236,16 @@ const char *const kVP9TestVectors[] = { "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm", "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm", "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm", -#endif // CONFIG_VP9_HIGHBITDEPTH` +#endif // CONFIG_VP9_HIGHBITDEPTH "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm", + RESIZE_TEST_VECTORS }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); +const char *const kVP9TestVectorsResize[] = { + RESIZE_TEST_VECTORS +}; +const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize); +#undef RESIZE_TEST_VECTORS #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/libvpx/test/test_vectors.h b/libvpx/test/test_vectors.h index 8e1aabb32..2c6918abd 100644 --- a/libvpx/test/test_vectors.h +++ b/libvpx/test/test_vectors.h @@ -23,6 +23,8 @@ extern const char *const kVP8TestVectors[]; #if CONFIG_VP9_DECODER extern const int kNumVP9TestVectors; extern const char *const kVP9TestVectors[]; +extern const int kNumVP9TestVectorsResize; +extern const char *const kVP9TestVectorsResize[]; #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/libvpx/test/tile_independence_test.cc b/libvpx/test/tile_independence_test.cc index 193bd4598..f15d94a0a 100644 --- a/libvpx/test/tile_independence_test.cc +++ b/libvpx/test/tile_independence_test.cc @@ -103,6 +103,4 @@ TEST_P(TileIndependenceTest, MD5Match) { } VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1)); - -VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1)); } // namespace diff --git a/libvpx/test/twopass_encoder.sh b/libvpx/test/twopass_encoder.sh index 1189e5131..7a223f2af 100755 --- a/libvpx/test/twopass_encoder.sh +++ b/libvpx/test/twopass_encoder.sh @@ -23,7 +23,8 @@ twopass_encoder_verify_environment() { fi } -# Runs twopass_encoder using the codec specified by $1. +# Runs twopass_encoder using the codec specified by $1 with a frame limit of +# 100. twopass_encoder() { local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}" local codec="$1" @@ -35,7 +36,7 @@ twopass_encoder() { fi eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ - "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 100 \ ${devnull} [ -e "${output_file}" ] || return 1 @@ -47,16 +48,13 @@ twopass_encoder_vp8() { fi } -# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this -# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast -# machine. -DISABLED_twopass_encoder_vp9() { +twopass_encoder_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then twopass_encoder vp9 || return 1 fi } twopass_encoder_tests="twopass_encoder_vp8 - DISABLED_twopass_encoder_vp9" + twopass_encoder_vp9" run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}" diff --git a/libvpx/test/variance_test.cc b/libvpx/test/variance_test.cc index 7a34db6b3..cb6339041 100644 --- a/libvpx/test/variance_test.cc +++ b/libvpx/test/variance_test.cc @@ -74,6 +74,10 @@ static unsigned int mb_ss_ref(const int16_t *src) { return res; } +/* Note: + * Our codebase calculates the "diff" value in the variance algorithm by + * (src - ref). + */ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, int l2h, int src_stride_coeff, int ref_stride_coeff, uint32_t *sse_ptr, @@ -87,14 +91,14 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, for (int x = 0; x < w; x++) { int diff; if (!use_high_bit_depth_) { - diff = ref[w * y * ref_stride_coeff + x] - - src[w * y * src_stride_coeff + x]; + diff = src[w * y * src_stride_coeff + x] - + ref[w * y * ref_stride_coeff + x]; se += diff; sse += diff * diff; #if CONFIG_VP9_HIGHBITDEPTH } else { - diff = CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x] - - CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x]; + diff = CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x] - + CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x]; se += diff; sse += diff * diff; #endif // CONFIG_VP9_HIGHBITDEPTH @@ -309,15 +313,15 @@ template<typename VarianceFunctionType> void VarianceTest<VarianceFunctionType>::RefTest() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size_; j++) { - if (!use_high_bit_depth_) { - src_[j] = rnd_.Rand8(); - ref_[j] = rnd_.Rand8(); + if (!use_high_bit_depth_) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH - } else { - CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() && mask_; - CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() && mask_; + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_; + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH - } + } } unsigned int sse1, sse2; unsigned int var1; @@ -328,8 +332,10 @@ void VarianceTest<VarianceFunctionType>::RefTest() { log2height_, stride_coeff, stride_coeff, &sse2, use_high_bit_depth_, bit_depth_); - EXPECT_EQ(sse1, sse2); - EXPECT_EQ(var1, var2); + EXPECT_EQ(sse1, sse2) + << "Error at test index: " << i; + EXPECT_EQ(var1, var2) + << "Error at test index: " << i; } } @@ -346,8 +352,8 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() { ref_[ref_ind] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { - CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() && mask_; - CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() && mask_; + CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask_; + CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH } } @@ -361,8 +367,10 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() { log2height_, src_stride_coeff, ref_stride_coeff, &sse2, use_high_bit_depth_, bit_depth_); - EXPECT_EQ(sse1, sse2); - EXPECT_EQ(var1, var2); + EXPECT_EQ(sse1, sse2) + << "Error at test index: " << i; + EXPECT_EQ(var1, var2) + << "Error at test index: " << i; } } @@ -747,115 +755,63 @@ TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); } INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_c)); -const Get4x4SseFunc get4x4sse_cs_c = vpx_get4x4sse_cs_c; INSTANTIATE_TEST_CASE_P(C, VpxSseTest, - ::testing::Values(make_tuple(2, 2, get4x4sse_cs_c))); + ::testing::Values(make_tuple(2, 2, + &vpx_get4x4sse_cs_c))); -const VarianceMxNFunc mse16x16_c = vpx_mse16x16_c; -const VarianceMxNFunc mse16x8_c = vpx_mse16x8_c; -const VarianceMxNFunc mse8x16_c = vpx_mse8x16_c; -const VarianceMxNFunc mse8x8_c = vpx_mse8x8_c; INSTANTIATE_TEST_CASE_P(C, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_c), - make_tuple(4, 3, mse16x8_c), - make_tuple(3, 4, mse8x16_c), - make_tuple(3, 3, mse8x8_c))); - -const VarianceMxNFunc variance64x64_c = vpx_variance64x64_c; -const VarianceMxNFunc variance64x32_c = vpx_variance64x32_c; -const VarianceMxNFunc variance32x64_c = vpx_variance32x64_c; -const VarianceMxNFunc variance32x32_c = vpx_variance32x32_c; -const VarianceMxNFunc variance32x16_c = vpx_variance32x16_c; -const VarianceMxNFunc variance16x32_c = vpx_variance16x32_c; -const VarianceMxNFunc variance16x16_c = vpx_variance16x16_c; -const VarianceMxNFunc variance16x8_c = vpx_variance16x8_c; -const VarianceMxNFunc variance8x16_c = vpx_variance8x16_c; -const VarianceMxNFunc variance8x8_c = vpx_variance8x8_c; -const VarianceMxNFunc variance8x4_c = vpx_variance8x4_c; -const VarianceMxNFunc variance4x8_c = vpx_variance4x8_c; -const VarianceMxNFunc variance4x4_c = vpx_variance4x4_c; + ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_c), + make_tuple(4, 3, &vpx_mse16x8_c), + make_tuple(3, 4, &vpx_mse8x16_c), + make_tuple(3, 3, &vpx_mse8x8_c))); + INSTANTIATE_TEST_CASE_P( C, VpxVarianceTest, - ::testing::Values(make_tuple(6, 6, variance64x64_c, 0), - make_tuple(6, 5, variance64x32_c, 0), - make_tuple(5, 6, variance32x64_c, 0), - make_tuple(5, 5, variance32x32_c, 0), - make_tuple(5, 4, variance32x16_c, 0), - make_tuple(4, 5, variance16x32_c, 0), - make_tuple(4, 4, variance16x16_c, 0), - make_tuple(4, 3, variance16x8_c, 0), - make_tuple(3, 4, variance8x16_c, 0), - make_tuple(3, 3, variance8x8_c, 0), - make_tuple(3, 2, variance8x4_c, 0), - make_tuple(2, 3, variance4x8_c, 0), - make_tuple(2, 2, variance4x4_c, 0))); - -const SubpixVarMxNFunc subpel_var64x64_c = vpx_sub_pixel_variance64x64_c; -const SubpixVarMxNFunc subpel_var64x32_c = vpx_sub_pixel_variance64x32_c; -const SubpixVarMxNFunc subpel_var32x64_c = vpx_sub_pixel_variance32x64_c; -const SubpixVarMxNFunc subpel_var32x32_c = vpx_sub_pixel_variance32x32_c; -const SubpixVarMxNFunc subpel_var32x16_c = vpx_sub_pixel_variance32x16_c; -const SubpixVarMxNFunc subpel_var16x32_c = vpx_sub_pixel_variance16x32_c; -const SubpixVarMxNFunc subpel_var16x16_c = vpx_sub_pixel_variance16x16_c; -const SubpixVarMxNFunc subpel_var16x8_c = vpx_sub_pixel_variance16x8_c; -const SubpixVarMxNFunc subpel_var8x16_c = vpx_sub_pixel_variance8x16_c; -const SubpixVarMxNFunc subpel_var8x8_c = vpx_sub_pixel_variance8x8_c; -const SubpixVarMxNFunc subpel_var8x4_c = vpx_sub_pixel_variance8x4_c; -const SubpixVarMxNFunc subpel_var4x8_c = vpx_sub_pixel_variance4x8_c; -const SubpixVarMxNFunc subpel_var4x4_c = vpx_sub_pixel_variance4x4_c; + ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_c, 0), + make_tuple(6, 5, &vpx_variance64x32_c, 0), + make_tuple(5, 6, &vpx_variance32x64_c, 0), + make_tuple(5, 5, &vpx_variance32x32_c, 0), + make_tuple(5, 4, &vpx_variance32x16_c, 0), + make_tuple(4, 5, &vpx_variance16x32_c, 0), + make_tuple(4, 4, &vpx_variance16x16_c, 0), + make_tuple(4, 3, &vpx_variance16x8_c, 0), + make_tuple(3, 4, &vpx_variance8x16_c, 0), + make_tuple(3, 3, &vpx_variance8x8_c, 0), + make_tuple(3, 2, &vpx_variance8x4_c, 0), + make_tuple(2, 3, &vpx_variance4x8_c, 0), + make_tuple(2, 2, &vpx_variance4x4_c, 0))); + INSTANTIATE_TEST_CASE_P( C, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_var64x64_c, 0), - make_tuple(6, 5, subpel_var64x32_c, 0), - make_tuple(5, 6, subpel_var32x64_c, 0), - make_tuple(5, 5, subpel_var32x32_c, 0), - make_tuple(5, 4, subpel_var32x16_c, 0), - make_tuple(4, 5, subpel_var16x32_c, 0), - make_tuple(4, 4, subpel_var16x16_c, 0), - make_tuple(4, 3, subpel_var16x8_c, 0), - make_tuple(3, 4, subpel_var8x16_c, 0), - make_tuple(3, 3, subpel_var8x8_c, 0), - make_tuple(3, 2, subpel_var8x4_c, 0), - make_tuple(2, 3, subpel_var4x8_c, 0), - make_tuple(2, 2, subpel_var4x4_c, 0))); - -const SubpixAvgVarMxNFunc subpel_avg_var64x64_c = - vpx_sub_pixel_avg_variance64x64_c; -const SubpixAvgVarMxNFunc subpel_avg_var64x32_c = - vpx_sub_pixel_avg_variance64x32_c; -const SubpixAvgVarMxNFunc subpel_avg_var32x64_c = - vpx_sub_pixel_avg_variance32x64_c; -const SubpixAvgVarMxNFunc subpel_avg_var32x32_c = - vpx_sub_pixel_avg_variance32x32_c; -const SubpixAvgVarMxNFunc subpel_avg_var32x16_c = - vpx_sub_pixel_avg_variance32x16_c; -const SubpixAvgVarMxNFunc subpel_avg_var16x32_c = - vpx_sub_pixel_avg_variance16x32_c; -const SubpixAvgVarMxNFunc subpel_avg_var16x16_c = - vpx_sub_pixel_avg_variance16x16_c; -const SubpixAvgVarMxNFunc subpel_avg_var16x8_c = - vpx_sub_pixel_avg_variance16x8_c; -const SubpixAvgVarMxNFunc subpel_avg_var8x16_c = - vpx_sub_pixel_avg_variance8x16_c; -const SubpixAvgVarMxNFunc subpel_avg_var8x8_c = vpx_sub_pixel_avg_variance8x8_c; -const SubpixAvgVarMxNFunc subpel_avg_var8x4_c = vpx_sub_pixel_avg_variance8x4_c; -const SubpixAvgVarMxNFunc subpel_avg_var4x8_c = vpx_sub_pixel_avg_variance4x8_c; -const SubpixAvgVarMxNFunc subpel_avg_var4x4_c = vpx_sub_pixel_avg_variance4x4_c; + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0), + make_tuple(6, 5, &vpx_sub_pixel_variance64x32_c, 0), + make_tuple(5, 6, &vpx_sub_pixel_variance32x64_c, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_c, 0), + make_tuple(5, 4, &vpx_sub_pixel_variance32x16_c, 0), + make_tuple(4, 5, &vpx_sub_pixel_variance16x32_c, 0), + make_tuple(4, 4, &vpx_sub_pixel_variance16x16_c, 0), + make_tuple(4, 3, &vpx_sub_pixel_variance16x8_c, 0), + make_tuple(3, 4, &vpx_sub_pixel_variance8x16_c, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_c, 0), + make_tuple(3, 2, &vpx_sub_pixel_variance8x4_c, 0), + make_tuple(2, 3, &vpx_sub_pixel_variance4x8_c, 0), + make_tuple(2, 2, &vpx_sub_pixel_variance4x4_c, 0))); + INSTANTIATE_TEST_CASE_P( C, VpxSubpelAvgVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_avg_var64x64_c, 0), - make_tuple(6, 5, subpel_avg_var64x32_c, 0), - make_tuple(5, 6, subpel_avg_var32x64_c, 0), - make_tuple(5, 5, subpel_avg_var32x32_c, 0), - make_tuple(5, 4, subpel_avg_var32x16_c, 0), - make_tuple(4, 5, subpel_avg_var16x32_c, 0), - make_tuple(4, 4, subpel_avg_var16x16_c, 0), - make_tuple(4, 3, subpel_avg_var16x8_c, 0), - make_tuple(3, 4, subpel_avg_var8x16_c, 0), - make_tuple(3, 3, subpel_avg_var8x8_c, 0), - make_tuple(3, 2, subpel_avg_var8x4_c, 0), - make_tuple(2, 3, subpel_avg_var4x8_c, 0), - make_tuple(2, 2, subpel_avg_var4x4_c, 0))); + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0), + make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0), + make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0), + make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0), + make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0), + make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0), + make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0), + make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0), + make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0), + make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0), + make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0), + make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0), + make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0))); #if CONFIG_VP9_HIGHBITDEPTH typedef MseTest<VarianceMxNFunc> VpxHBDMseTest; @@ -875,1166 +831,507 @@ TEST_P(VpxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(VpxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } /* TODO(debargha): This test does not support the highbd version -const VarianceMxNFunc highbd_12_mse16x16_c = vpx_highbd_12_mse16x16_c; -const VarianceMxNFunc highbd_12_mse16x8_c = vpx_highbd_12_mse16x8_c; -const VarianceMxNFunc highbd_12_mse8x16_c = vpx_highbd_12_mse8x16_c; -const VarianceMxNFunc highbd_12_mse8x8_c = vpx_highbd_12_mse8x8_c; - -const VarianceMxNFunc highbd_10_mse16x16_c = vpx_highbd_10_mse16x16_c; -const VarianceMxNFunc highbd_10_mse16x8_c = vpx_highbd_10_mse16x8_c; -const VarianceMxNFunc highbd_10_mse8x16_c = vpx_highbd_10_mse8x16_c; -const VarianceMxNFunc highbd_10_mse8x8_c = vpx_highbd_10_mse8x8_c; - -const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c; -const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c; -const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c; -const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c; INSTANTIATE_TEST_CASE_P( - C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c), - make_tuple(4, 4, highbd_12_mse16x8_c), - make_tuple(4, 4, highbd_12_mse8x16_c), - make_tuple(4, 4, highbd_12_mse8x8_c), - make_tuple(4, 4, highbd_10_mse16x16_c), - make_tuple(4, 4, highbd_10_mse16x8_c), - make_tuple(4, 4, highbd_10_mse8x16_c), - make_tuple(4, 4, highbd_10_mse8x8_c), - make_tuple(4, 4, highbd_8_mse16x16_c), - make_tuple(4, 4, highbd_8_mse16x8_c), - make_tuple(4, 4, highbd_8_mse8x16_c), - make_tuple(4, 4, highbd_8_mse8x8_c))); + C, VpxHBDMseTest, + ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_c), + make_tuple(4, 4, &vpx_highbd_12_mse16x8_c), + make_tuple(4, 4, &vpx_highbd_12_mse8x16_c), + make_tuple(4, 4, &vpx_highbd_12_mse8x8_c), + make_tuple(4, 4, &vpx_highbd_10_mse16x16_c), + make_tuple(4, 4, &vpx_highbd_10_mse16x8_c), + make_tuple(4, 4, &vpx_highbd_10_mse8x16_c), + make_tuple(4, 4, &vpx_highbd_10_mse8x8_c), + make_tuple(4, 4, &vpx_highbd_8_mse16x16_c), + make_tuple(4, 4, &vpx_highbd_8_mse16x8_c), + make_tuple(4, 4, &vpx_highbd_8_mse8x16_c), + make_tuple(4, 4, &vpx_highbd_8_mse8x8_c))); */ -const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c; -const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c; -const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c; -const VarianceMxNFunc highbd_12_variance32x32_c = vpx_highbd_12_variance32x32_c; -const VarianceMxNFunc highbd_12_variance32x16_c = vpx_highbd_12_variance32x16_c; -const VarianceMxNFunc highbd_12_variance16x32_c = vpx_highbd_12_variance16x32_c; -const VarianceMxNFunc highbd_12_variance16x16_c = vpx_highbd_12_variance16x16_c; -const VarianceMxNFunc highbd_12_variance16x8_c = vpx_highbd_12_variance16x8_c; -const VarianceMxNFunc highbd_12_variance8x16_c = vpx_highbd_12_variance8x16_c; -const VarianceMxNFunc highbd_12_variance8x8_c = vpx_highbd_12_variance8x8_c; -const VarianceMxNFunc highbd_12_variance8x4_c = vpx_highbd_12_variance8x4_c; -const VarianceMxNFunc highbd_12_variance4x8_c = vpx_highbd_12_variance4x8_c; -const VarianceMxNFunc highbd_12_variance4x4_c = vpx_highbd_12_variance4x4_c; -const VarianceMxNFunc highbd_10_variance64x64_c = vpx_highbd_10_variance64x64_c; -const VarianceMxNFunc highbd_10_variance64x32_c = vpx_highbd_10_variance64x32_c; -const VarianceMxNFunc highbd_10_variance32x64_c = vpx_highbd_10_variance32x64_c; -const VarianceMxNFunc highbd_10_variance32x32_c = vpx_highbd_10_variance32x32_c; -const VarianceMxNFunc highbd_10_variance32x16_c = vpx_highbd_10_variance32x16_c; -const VarianceMxNFunc highbd_10_variance16x32_c = vpx_highbd_10_variance16x32_c; -const VarianceMxNFunc highbd_10_variance16x16_c = vpx_highbd_10_variance16x16_c; -const VarianceMxNFunc highbd_10_variance16x8_c = vpx_highbd_10_variance16x8_c; -const VarianceMxNFunc highbd_10_variance8x16_c = vpx_highbd_10_variance8x16_c; -const VarianceMxNFunc highbd_10_variance8x8_c = vpx_highbd_10_variance8x8_c; -const VarianceMxNFunc highbd_10_variance8x4_c = vpx_highbd_10_variance8x4_c; -const VarianceMxNFunc highbd_10_variance4x8_c = vpx_highbd_10_variance4x8_c; -const VarianceMxNFunc highbd_10_variance4x4_c = vpx_highbd_10_variance4x4_c; -const VarianceMxNFunc highbd_8_variance64x64_c = vpx_highbd_8_variance64x64_c; -const VarianceMxNFunc highbd_8_variance64x32_c = vpx_highbd_8_variance64x32_c; -const VarianceMxNFunc highbd_8_variance32x64_c = vpx_highbd_8_variance32x64_c; -const VarianceMxNFunc highbd_8_variance32x32_c = vpx_highbd_8_variance32x32_c; -const VarianceMxNFunc highbd_8_variance32x16_c = vpx_highbd_8_variance32x16_c; -const VarianceMxNFunc highbd_8_variance16x32_c = vpx_highbd_8_variance16x32_c; -const VarianceMxNFunc highbd_8_variance16x16_c = vpx_highbd_8_variance16x16_c; -const VarianceMxNFunc highbd_8_variance16x8_c = vpx_highbd_8_variance16x8_c; -const VarianceMxNFunc highbd_8_variance8x16_c = vpx_highbd_8_variance8x16_c; -const VarianceMxNFunc highbd_8_variance8x8_c = vpx_highbd_8_variance8x8_c; -const VarianceMxNFunc highbd_8_variance8x4_c = vpx_highbd_8_variance8x4_c; -const VarianceMxNFunc highbd_8_variance4x8_c = vpx_highbd_8_variance4x8_c; -const VarianceMxNFunc highbd_8_variance4x4_c = vpx_highbd_8_variance4x4_c; INSTANTIATE_TEST_CASE_P( C, VpxHBDVarianceTest, - ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_c, 12), - make_tuple(6, 5, highbd_12_variance64x32_c, 12), - make_tuple(5, 6, highbd_12_variance32x64_c, 12), - make_tuple(5, 5, highbd_12_variance32x32_c, 12), - make_tuple(5, 4, highbd_12_variance32x16_c, 12), - make_tuple(4, 5, highbd_12_variance16x32_c, 12), - make_tuple(4, 4, highbd_12_variance16x16_c, 12), - make_tuple(4, 3, highbd_12_variance16x8_c, 12), - make_tuple(3, 4, highbd_12_variance8x16_c, 12), - make_tuple(3, 3, highbd_12_variance8x8_c, 12), - make_tuple(3, 2, highbd_12_variance8x4_c, 12), - make_tuple(2, 3, highbd_12_variance4x8_c, 12), - make_tuple(2, 2, highbd_12_variance4x4_c, 12), - make_tuple(6, 6, highbd_10_variance64x64_c, 10), - make_tuple(6, 5, highbd_10_variance64x32_c, 10), - make_tuple(5, 6, highbd_10_variance32x64_c, 10), - make_tuple(5, 5, highbd_10_variance32x32_c, 10), - make_tuple(5, 4, highbd_10_variance32x16_c, 10), - make_tuple(4, 5, highbd_10_variance16x32_c, 10), - make_tuple(4, 4, highbd_10_variance16x16_c, 10), - make_tuple(4, 3, highbd_10_variance16x8_c, 10), - make_tuple(3, 4, highbd_10_variance8x16_c, 10), - make_tuple(3, 3, highbd_10_variance8x8_c, 10), - make_tuple(3, 2, highbd_10_variance8x4_c, 10), - make_tuple(2, 3, highbd_10_variance4x8_c, 10), - make_tuple(2, 2, highbd_10_variance4x4_c, 10), - make_tuple(6, 6, highbd_8_variance64x64_c, 8), - make_tuple(6, 5, highbd_8_variance64x32_c, 8), - make_tuple(5, 6, highbd_8_variance32x64_c, 8), - make_tuple(5, 5, highbd_8_variance32x32_c, 8), - make_tuple(5, 4, highbd_8_variance32x16_c, 8), - make_tuple(4, 5, highbd_8_variance16x32_c, 8), - make_tuple(4, 4, highbd_8_variance16x16_c, 8), - make_tuple(4, 3, highbd_8_variance16x8_c, 8), - make_tuple(3, 4, highbd_8_variance8x16_c, 8), - make_tuple(3, 3, highbd_8_variance8x8_c, 8), - make_tuple(3, 2, highbd_8_variance8x4_c, 8), - make_tuple(2, 3, highbd_8_variance4x8_c, 8), - make_tuple(2, 2, highbd_8_variance4x4_c, 8))); - -const SubpixVarMxNFunc highbd_8_subpel_var64x64_c = - vpx_highbd_8_sub_pixel_variance64x64_c; -const SubpixVarMxNFunc highbd_8_subpel_var64x32_c = - vpx_highbd_8_sub_pixel_variance64x32_c; -const SubpixVarMxNFunc highbd_8_subpel_var32x64_c = - vpx_highbd_8_sub_pixel_variance32x64_c; -const SubpixVarMxNFunc highbd_8_subpel_var32x32_c = - vpx_highbd_8_sub_pixel_variance32x32_c; -const SubpixVarMxNFunc highbd_8_subpel_var32x16_c = - vpx_highbd_8_sub_pixel_variance32x16_c; -const SubpixVarMxNFunc highbd_8_subpel_var16x32_c = - vpx_highbd_8_sub_pixel_variance16x32_c; -const SubpixVarMxNFunc highbd_8_subpel_var16x16_c = - vpx_highbd_8_sub_pixel_variance16x16_c; -const SubpixVarMxNFunc highbd_8_subpel_var16x8_c = - vpx_highbd_8_sub_pixel_variance16x8_c; -const SubpixVarMxNFunc highbd_8_subpel_var8x16_c = - vpx_highbd_8_sub_pixel_variance8x16_c; -const SubpixVarMxNFunc highbd_8_subpel_var8x8_c = - vpx_highbd_8_sub_pixel_variance8x8_c; -const SubpixVarMxNFunc highbd_8_subpel_var8x4_c = - vpx_highbd_8_sub_pixel_variance8x4_c; -const SubpixVarMxNFunc highbd_8_subpel_var4x8_c = - vpx_highbd_8_sub_pixel_variance4x8_c; -const SubpixVarMxNFunc highbd_8_subpel_var4x4_c = - vpx_highbd_8_sub_pixel_variance4x4_c; -const SubpixVarMxNFunc highbd_10_subpel_var64x64_c = - vpx_highbd_10_sub_pixel_variance64x64_c; -const SubpixVarMxNFunc highbd_10_subpel_var64x32_c = - vpx_highbd_10_sub_pixel_variance64x32_c; -const SubpixVarMxNFunc highbd_10_subpel_var32x64_c = - vpx_highbd_10_sub_pixel_variance32x64_c; -const SubpixVarMxNFunc highbd_10_subpel_var32x32_c = - vpx_highbd_10_sub_pixel_variance32x32_c; -const SubpixVarMxNFunc highbd_10_subpel_var32x16_c = - vpx_highbd_10_sub_pixel_variance32x16_c; -const SubpixVarMxNFunc highbd_10_subpel_var16x32_c = - vpx_highbd_10_sub_pixel_variance16x32_c; -const SubpixVarMxNFunc highbd_10_subpel_var16x16_c = - vpx_highbd_10_sub_pixel_variance16x16_c; -const SubpixVarMxNFunc highbd_10_subpel_var16x8_c = - vpx_highbd_10_sub_pixel_variance16x8_c; -const SubpixVarMxNFunc highbd_10_subpel_var8x16_c = - vpx_highbd_10_sub_pixel_variance8x16_c; -const SubpixVarMxNFunc highbd_10_subpel_var8x8_c = - vpx_highbd_10_sub_pixel_variance8x8_c; -const SubpixVarMxNFunc highbd_10_subpel_var8x4_c = - vpx_highbd_10_sub_pixel_variance8x4_c; -const SubpixVarMxNFunc highbd_10_subpel_var4x8_c = - vpx_highbd_10_sub_pixel_variance4x8_c; -const SubpixVarMxNFunc highbd_10_subpel_var4x4_c = - vpx_highbd_10_sub_pixel_variance4x4_c; -const SubpixVarMxNFunc highbd_12_subpel_var64x64_c = - vpx_highbd_12_sub_pixel_variance64x64_c; -const SubpixVarMxNFunc highbd_12_subpel_var64x32_c = - vpx_highbd_12_sub_pixel_variance64x32_c; -const SubpixVarMxNFunc highbd_12_subpel_var32x64_c = - vpx_highbd_12_sub_pixel_variance32x64_c; -const SubpixVarMxNFunc highbd_12_subpel_var32x32_c = - vpx_highbd_12_sub_pixel_variance32x32_c; -const SubpixVarMxNFunc highbd_12_subpel_var32x16_c = - vpx_highbd_12_sub_pixel_variance32x16_c; -const SubpixVarMxNFunc highbd_12_subpel_var16x32_c = - vpx_highbd_12_sub_pixel_variance16x32_c; -const SubpixVarMxNFunc highbd_12_subpel_var16x16_c = - vpx_highbd_12_sub_pixel_variance16x16_c; -const SubpixVarMxNFunc highbd_12_subpel_var16x8_c = - vpx_highbd_12_sub_pixel_variance16x8_c; -const SubpixVarMxNFunc highbd_12_subpel_var8x16_c = - vpx_highbd_12_sub_pixel_variance8x16_c; -const SubpixVarMxNFunc highbd_12_subpel_var8x8_c = - vpx_highbd_12_sub_pixel_variance8x8_c; -const SubpixVarMxNFunc highbd_12_subpel_var8x4_c = - vpx_highbd_12_sub_pixel_variance8x4_c; -const SubpixVarMxNFunc highbd_12_subpel_var4x8_c = - vpx_highbd_12_sub_pixel_variance4x8_c; -const SubpixVarMxNFunc highbd_12_subpel_var4x4_c = - vpx_highbd_12_sub_pixel_variance4x4_c; + ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12), + make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10), + make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8))); + INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, highbd_8_subpel_var64x64_c, 8), - make_tuple(6, 5, highbd_8_subpel_var64x32_c, 8), - make_tuple(5, 6, highbd_8_subpel_var32x64_c, 8), - make_tuple(5, 5, highbd_8_subpel_var32x32_c, 8), - make_tuple(5, 4, highbd_8_subpel_var32x16_c, 8), - make_tuple(4, 5, highbd_8_subpel_var16x32_c, 8), - make_tuple(4, 4, highbd_8_subpel_var16x16_c, 8), - make_tuple(4, 3, highbd_8_subpel_var16x8_c, 8), - make_tuple(3, 4, highbd_8_subpel_var8x16_c, 8), - make_tuple(3, 3, highbd_8_subpel_var8x8_c, 8), - make_tuple(3, 2, highbd_8_subpel_var8x4_c, 8), - make_tuple(2, 3, highbd_8_subpel_var4x8_c, 8), - make_tuple(2, 2, highbd_8_subpel_var4x4_c, 8), - make_tuple(6, 6, highbd_10_subpel_var64x64_c, 10), - make_tuple(6, 5, highbd_10_subpel_var64x32_c, 10), - make_tuple(5, 6, highbd_10_subpel_var32x64_c, 10), - make_tuple(5, 5, highbd_10_subpel_var32x32_c, 10), - make_tuple(5, 4, highbd_10_subpel_var32x16_c, 10), - make_tuple(4, 5, highbd_10_subpel_var16x32_c, 10), - make_tuple(4, 4, highbd_10_subpel_var16x16_c, 10), - make_tuple(4, 3, highbd_10_subpel_var16x8_c, 10), - make_tuple(3, 4, highbd_10_subpel_var8x16_c, 10), - make_tuple(3, 3, highbd_10_subpel_var8x8_c, 10), - make_tuple(3, 2, highbd_10_subpel_var8x4_c, 10), - make_tuple(2, 3, highbd_10_subpel_var4x8_c, 10), - make_tuple(2, 2, highbd_10_subpel_var4x4_c, 10), - make_tuple(6, 6, highbd_12_subpel_var64x64_c, 12), - make_tuple(6, 5, highbd_12_subpel_var64x32_c, 12), - make_tuple(5, 6, highbd_12_subpel_var32x64_c, 12), - make_tuple(5, 5, highbd_12_subpel_var32x32_c, 12), - make_tuple(5, 4, highbd_12_subpel_var32x16_c, 12), - make_tuple(4, 5, highbd_12_subpel_var16x32_c, 12), - make_tuple(4, 4, highbd_12_subpel_var16x16_c, 12), - make_tuple(4, 3, highbd_12_subpel_var16x8_c, 12), - make_tuple(3, 4, highbd_12_subpel_var8x16_c, 12), - make_tuple(3, 3, highbd_12_subpel_var8x8_c, 12), - make_tuple(3, 2, highbd_12_subpel_var8x4_c, 12), - make_tuple(2, 3, highbd_12_subpel_var4x8_c, 12), - make_tuple(2, 2, highbd_12_subpel_var4x4_c, 12))); - -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x64_c = - vpx_highbd_8_sub_pixel_avg_variance64x64_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x32_c = - vpx_highbd_8_sub_pixel_avg_variance64x32_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x64_c = - vpx_highbd_8_sub_pixel_avg_variance32x64_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x32_c = - vpx_highbd_8_sub_pixel_avg_variance32x32_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x16_c = - vpx_highbd_8_sub_pixel_avg_variance32x16_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x32_c = - vpx_highbd_8_sub_pixel_avg_variance16x32_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x16_c = - vpx_highbd_8_sub_pixel_avg_variance16x16_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x8_c = - vpx_highbd_8_sub_pixel_avg_variance16x8_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x16_c = - vpx_highbd_8_sub_pixel_avg_variance8x16_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x8_c = - vpx_highbd_8_sub_pixel_avg_variance8x8_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x4_c = - vpx_highbd_8_sub_pixel_avg_variance8x4_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x8_c = - vpx_highbd_8_sub_pixel_avg_variance4x8_c; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x4_c = - vpx_highbd_8_sub_pixel_avg_variance4x4_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x64_c = - vpx_highbd_10_sub_pixel_avg_variance64x64_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x32_c = - vpx_highbd_10_sub_pixel_avg_variance64x32_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x64_c = - vpx_highbd_10_sub_pixel_avg_variance32x64_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x32_c = - vpx_highbd_10_sub_pixel_avg_variance32x32_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x16_c = - vpx_highbd_10_sub_pixel_avg_variance32x16_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x32_c = - vpx_highbd_10_sub_pixel_avg_variance16x32_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x16_c = - vpx_highbd_10_sub_pixel_avg_variance16x16_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x8_c = - vpx_highbd_10_sub_pixel_avg_variance16x8_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x16_c = - vpx_highbd_10_sub_pixel_avg_variance8x16_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x8_c = - vpx_highbd_10_sub_pixel_avg_variance8x8_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x4_c = - vpx_highbd_10_sub_pixel_avg_variance8x4_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x8_c = - vpx_highbd_10_sub_pixel_avg_variance4x8_c; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x4_c = - vpx_highbd_10_sub_pixel_avg_variance4x4_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x64_c = - vpx_highbd_12_sub_pixel_avg_variance64x64_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x32_c = - vpx_highbd_12_sub_pixel_avg_variance64x32_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x64_c = - vpx_highbd_12_sub_pixel_avg_variance32x64_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x32_c = - vpx_highbd_12_sub_pixel_avg_variance32x32_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x16_c = - vpx_highbd_12_sub_pixel_avg_variance32x16_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x32_c = - vpx_highbd_12_sub_pixel_avg_variance16x32_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x16_c = - vpx_highbd_12_sub_pixel_avg_variance16x16_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x8_c = - vpx_highbd_12_sub_pixel_avg_variance16x8_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x16_c = - vpx_highbd_12_sub_pixel_avg_variance8x16_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x8_c = - vpx_highbd_12_sub_pixel_avg_variance8x8_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x4_c = - vpx_highbd_12_sub_pixel_avg_variance8x4_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x8_c = - vpx_highbd_12_sub_pixel_avg_variance4x8_c; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x4_c = - vpx_highbd_12_sub_pixel_avg_variance4x4_c; + ::testing::Values( + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8), + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10), + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12))); + INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelAvgVarianceTest, ::testing::Values( - make_tuple(6, 6, highbd_8_subpel_avg_var64x64_c, 8), - make_tuple(6, 5, highbd_8_subpel_avg_var64x32_c, 8), - make_tuple(5, 6, highbd_8_subpel_avg_var32x64_c, 8), - make_tuple(5, 5, highbd_8_subpel_avg_var32x32_c, 8), - make_tuple(5, 4, highbd_8_subpel_avg_var32x16_c, 8), - make_tuple(4, 5, highbd_8_subpel_avg_var16x32_c, 8), - make_tuple(4, 4, highbd_8_subpel_avg_var16x16_c, 8), - make_tuple(4, 3, highbd_8_subpel_avg_var16x8_c, 8), - make_tuple(3, 4, highbd_8_subpel_avg_var8x16_c, 8), - make_tuple(3, 3, highbd_8_subpel_avg_var8x8_c, 8), - make_tuple(3, 2, highbd_8_subpel_avg_var8x4_c, 8), - make_tuple(2, 3, highbd_8_subpel_avg_var4x8_c, 8), - make_tuple(2, 2, highbd_8_subpel_avg_var4x4_c, 8), - make_tuple(6, 6, highbd_10_subpel_avg_var64x64_c, 10), - make_tuple(6, 5, highbd_10_subpel_avg_var64x32_c, 10), - make_tuple(5, 6, highbd_10_subpel_avg_var32x64_c, 10), - make_tuple(5, 5, highbd_10_subpel_avg_var32x32_c, 10), - make_tuple(5, 4, highbd_10_subpel_avg_var32x16_c, 10), - make_tuple(4, 5, highbd_10_subpel_avg_var16x32_c, 10), - make_tuple(4, 4, highbd_10_subpel_avg_var16x16_c, 10), - make_tuple(4, 3, highbd_10_subpel_avg_var16x8_c, 10), - make_tuple(3, 4, highbd_10_subpel_avg_var8x16_c, 10), - make_tuple(3, 3, highbd_10_subpel_avg_var8x8_c, 10), - make_tuple(3, 2, highbd_10_subpel_avg_var8x4_c, 10), - make_tuple(2, 3, highbd_10_subpel_avg_var4x8_c, 10), - make_tuple(2, 2, highbd_10_subpel_avg_var4x4_c, 10), - make_tuple(6, 6, highbd_12_subpel_avg_var64x64_c, 12), - make_tuple(6, 5, highbd_12_subpel_avg_var64x32_c, 12), - make_tuple(5, 6, highbd_12_subpel_avg_var32x64_c, 12), - make_tuple(5, 5, highbd_12_subpel_avg_var32x32_c, 12), - make_tuple(5, 4, highbd_12_subpel_avg_var32x16_c, 12), - make_tuple(4, 5, highbd_12_subpel_avg_var16x32_c, 12), - make_tuple(4, 4, highbd_12_subpel_avg_var16x16_c, 12), - make_tuple(4, 3, highbd_12_subpel_avg_var16x8_c, 12), - make_tuple(3, 4, highbd_12_subpel_avg_var8x16_c, 12), - make_tuple(3, 3, highbd_12_subpel_avg_var8x8_c, 12), - make_tuple(3, 2, highbd_12_subpel_avg_var8x4_c, 12), - make_tuple(2, 3, highbd_12_subpel_avg_var4x8_c, 12), - make_tuple(2, 2, highbd_12_subpel_avg_var4x4_c, 12))); + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8), + make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8), + make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8), + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10), + make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10), + make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10), + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12), + make_tuple(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12), + make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12))); #endif // CONFIG_VP9_HIGHBITDEPTH -#if HAVE_MMX -const VarianceMxNFunc mse16x16_mmx = vpx_mse16x16_mmx; -INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_mmx))); - -INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest, - ::testing::Values(vpx_get_mb_ss_mmx)); - -const VarianceMxNFunc variance16x16_mmx = vpx_variance16x16_mmx; -const VarianceMxNFunc variance16x8_mmx = vpx_variance16x8_mmx; -const VarianceMxNFunc variance8x16_mmx = vpx_variance8x16_mmx; -const VarianceMxNFunc variance8x8_mmx = vpx_variance8x8_mmx; -const VarianceMxNFunc variance4x4_mmx = vpx_variance4x4_mmx; -INSTANTIATE_TEST_CASE_P( - MMX, VpxVarianceTest, - ::testing::Values(make_tuple(4, 4, variance16x16_mmx, 0), - make_tuple(4, 3, variance16x8_mmx, 0), - make_tuple(3, 4, variance8x16_mmx, 0), - make_tuple(3, 3, variance8x8_mmx, 0), - make_tuple(2, 2, variance4x4_mmx, 0))); - -const SubpixVarMxNFunc subpel_var16x16_mmx = vpx_sub_pixel_variance16x16_mmx; -const SubpixVarMxNFunc subpel_var16x8_mmx = vpx_sub_pixel_variance16x8_mmx; -const SubpixVarMxNFunc subpel_var8x16_mmx = vpx_sub_pixel_variance8x16_mmx; -const SubpixVarMxNFunc subpel_var8x8_mmx = vpx_sub_pixel_variance8x8_mmx; -const SubpixVarMxNFunc subpel_var4x4_mmx = vpx_sub_pixel_variance4x4_mmx; -INSTANTIATE_TEST_CASE_P( - MMX, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(4, 4, subpel_var16x16_mmx, 0), - make_tuple(4, 3, subpel_var16x8_mmx, 0), - make_tuple(3, 4, subpel_var8x16_mmx, 0), - make_tuple(3, 3, subpel_var8x8_mmx, 0), - make_tuple(2, 2, subpel_var4x4_mmx, 0))); -#endif // HAVE_MMX - #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_sse2)); -const VarianceMxNFunc mse16x16_sse2 = vpx_mse16x16_sse2; -const VarianceMxNFunc mse16x8_sse2 = vpx_mse16x8_sse2; -const VarianceMxNFunc mse8x16_sse2 = vpx_mse8x16_sse2; -const VarianceMxNFunc mse8x8_sse2 = vpx_mse8x8_sse2; INSTANTIATE_TEST_CASE_P(SSE2, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_sse2), - make_tuple(4, 3, mse16x8_sse2), - make_tuple(3, 4, mse8x16_sse2), - make_tuple(3, 3, mse8x8_sse2))); - -const VarianceMxNFunc variance64x64_sse2 = vpx_variance64x64_sse2; -const VarianceMxNFunc variance64x32_sse2 = vpx_variance64x32_sse2; -const VarianceMxNFunc variance32x64_sse2 = vpx_variance32x64_sse2; -const VarianceMxNFunc variance32x32_sse2 = vpx_variance32x32_sse2; -const VarianceMxNFunc variance32x16_sse2 = vpx_variance32x16_sse2; -const VarianceMxNFunc variance16x32_sse2 = vpx_variance16x32_sse2; -const VarianceMxNFunc variance16x16_sse2 = vpx_variance16x16_sse2; -const VarianceMxNFunc variance16x8_sse2 = vpx_variance16x8_sse2; -const VarianceMxNFunc variance8x16_sse2 = vpx_variance8x16_sse2; -const VarianceMxNFunc variance8x8_sse2 = vpx_variance8x8_sse2; -const VarianceMxNFunc variance8x4_sse2 = vpx_variance8x4_sse2; -const VarianceMxNFunc variance4x8_sse2 = vpx_variance4x8_sse2; -const VarianceMxNFunc variance4x4_sse2 = vpx_variance4x4_sse2; + ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_sse2), + make_tuple(4, 3, &vpx_mse16x8_sse2), + make_tuple(3, 4, &vpx_mse8x16_sse2), + make_tuple(3, 3, &vpx_mse8x8_sse2))); + INSTANTIATE_TEST_CASE_P( SSE2, VpxVarianceTest, - ::testing::Values(make_tuple(6, 6, variance64x64_sse2, 0), - make_tuple(6, 5, variance64x32_sse2, 0), - make_tuple(5, 6, variance32x64_sse2, 0), - make_tuple(5, 5, variance32x32_sse2, 0), - make_tuple(5, 4, variance32x16_sse2, 0), - make_tuple(4, 5, variance16x32_sse2, 0), - make_tuple(4, 4, variance16x16_sse2, 0), - make_tuple(4, 3, variance16x8_sse2, 0), - make_tuple(3, 4, variance8x16_sse2, 0), - make_tuple(3, 3, variance8x8_sse2, 0), - make_tuple(3, 2, variance8x4_sse2, 0), - make_tuple(2, 3, variance4x8_sse2, 0), - make_tuple(2, 2, variance4x4_sse2, 0))); + ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_sse2, 0), + make_tuple(6, 5, &vpx_variance64x32_sse2, 0), + make_tuple(5, 6, &vpx_variance32x64_sse2, 0), + make_tuple(5, 5, &vpx_variance32x32_sse2, 0), + make_tuple(5, 4, &vpx_variance32x16_sse2, 0), + make_tuple(4, 5, &vpx_variance16x32_sse2, 0), + make_tuple(4, 4, &vpx_variance16x16_sse2, 0), + make_tuple(4, 3, &vpx_variance16x8_sse2, 0), + make_tuple(3, 4, &vpx_variance8x16_sse2, 0), + make_tuple(3, 3, &vpx_variance8x8_sse2, 0), + make_tuple(3, 2, &vpx_variance8x4_sse2, 0), + make_tuple(2, 3, &vpx_variance4x8_sse2, 0), + make_tuple(2, 2, &vpx_variance4x4_sse2, 0))); #if CONFIG_USE_X86INC -const SubpixVarMxNFunc subpel_variance64x64_sse2 = - vpx_sub_pixel_variance64x64_sse2; -const SubpixVarMxNFunc subpel_variance64x32_sse2 = - vpx_sub_pixel_variance64x32_sse2; -const SubpixVarMxNFunc subpel_variance32x64_sse2 = - vpx_sub_pixel_variance32x64_sse2; -const SubpixVarMxNFunc subpel_variance32x32_sse2 = - vpx_sub_pixel_variance32x32_sse2; -const SubpixVarMxNFunc subpel_variance32x16_sse2 = - vpx_sub_pixel_variance32x16_sse2; -const SubpixVarMxNFunc subpel_variance16x32_sse2 = - vpx_sub_pixel_variance16x32_sse2; -const SubpixVarMxNFunc subpel_variance16x16_sse2 = - vpx_sub_pixel_variance16x16_sse2; -const SubpixVarMxNFunc subpel_variance16x8_sse2 = - vpx_sub_pixel_variance16x8_sse2; -const SubpixVarMxNFunc subpel_variance8x16_sse2 = - vpx_sub_pixel_variance8x16_sse2; -const SubpixVarMxNFunc subpel_variance8x8_sse2 = vpx_sub_pixel_variance8x8_sse2; -const SubpixVarMxNFunc subpel_variance8x4_sse2 = vpx_sub_pixel_variance8x4_sse2; -const SubpixVarMxNFunc subpel_variance4x8_sse = vpx_sub_pixel_variance4x8_sse; -const SubpixVarMxNFunc subpel_variance4x4_sse = vpx_sub_pixel_variance4x4_sse; INSTANTIATE_TEST_CASE_P( SSE2, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_variance64x64_sse2, 0), - make_tuple(6, 5, subpel_variance64x32_sse2, 0), - make_tuple(5, 6, subpel_variance32x64_sse2, 0), - make_tuple(5, 5, subpel_variance32x32_sse2, 0), - make_tuple(5, 4, subpel_variance32x16_sse2, 0), - make_tuple(4, 5, subpel_variance16x32_sse2, 0), - make_tuple(4, 4, subpel_variance16x16_sse2, 0), - make_tuple(4, 3, subpel_variance16x8_sse2, 0), - make_tuple(3, 4, subpel_variance8x16_sse2, 0), - make_tuple(3, 3, subpel_variance8x8_sse2, 0), - make_tuple(3, 2, subpel_variance8x4_sse2, 0), - make_tuple(2, 3, subpel_variance4x8_sse, 0), - make_tuple(2, 2, subpel_variance4x4_sse, 0))); - -const SubpixAvgVarMxNFunc subpel_avg_variance64x64_sse2 = - vpx_sub_pixel_avg_variance64x64_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance64x32_sse2 = - vpx_sub_pixel_avg_variance64x32_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance32x64_sse2 = - vpx_sub_pixel_avg_variance32x64_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance32x32_sse2 = - vpx_sub_pixel_avg_variance32x32_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance32x16_sse2 = - vpx_sub_pixel_avg_variance32x16_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance16x32_sse2 = - vpx_sub_pixel_avg_variance16x32_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance16x16_sse2 = - vpx_sub_pixel_avg_variance16x16_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance16x8_sse2 = - vpx_sub_pixel_avg_variance16x8_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance8x16_sse2 = - vpx_sub_pixel_avg_variance8x16_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance8x8_sse2 = - vpx_sub_pixel_avg_variance8x8_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance8x4_sse2 = - vpx_sub_pixel_avg_variance8x4_sse2; -const SubpixAvgVarMxNFunc subpel_avg_variance4x8_sse = - vpx_sub_pixel_avg_variance4x8_sse; -const SubpixAvgVarMxNFunc subpel_avg_variance4x4_sse = - vpx_sub_pixel_avg_variance4x4_sse; + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0), + make_tuple(6, 5, &vpx_sub_pixel_variance64x32_sse2, 0), + make_tuple(5, 6, &vpx_sub_pixel_variance32x64_sse2, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_sse2, 0), + make_tuple(5, 4, &vpx_sub_pixel_variance32x16_sse2, 0), + make_tuple(4, 5, &vpx_sub_pixel_variance16x32_sse2, 0), + make_tuple(4, 4, &vpx_sub_pixel_variance16x16_sse2, 0), + make_tuple(4, 3, &vpx_sub_pixel_variance16x8_sse2, 0), + make_tuple(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0), + make_tuple(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0), + make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse2, 0), + make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse2, 0))); + INSTANTIATE_TEST_CASE_P( SSE2, VpxSubpelAvgVarianceTest, ::testing::Values( - make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0), - make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0), - make_tuple(5, 6, subpel_avg_variance32x64_sse2, 0), - make_tuple(5, 5, subpel_avg_variance32x32_sse2, 0), - make_tuple(5, 4, subpel_avg_variance32x16_sse2, 0), - make_tuple(4, 5, subpel_avg_variance16x32_sse2, 0), - make_tuple(4, 4, subpel_avg_variance16x16_sse2, 0), - make_tuple(4, 3, subpel_avg_variance16x8_sse2, 0), - make_tuple(3, 4, subpel_avg_variance8x16_sse2, 0), - make_tuple(3, 3, subpel_avg_variance8x8_sse2, 0), - make_tuple(3, 2, subpel_avg_variance8x4_sse2, 0), - make_tuple(2, 3, subpel_avg_variance4x8_sse, 0), - make_tuple(2, 2, subpel_avg_variance4x4_sse, 0))); + make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_sse2, 0), + make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_sse2, 0), + make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_sse2, 0), + make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_sse2, 0), + make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_sse2, 0), + make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_sse2, 0), + make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_sse2, 0), + make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_sse2, 0), + make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0), + make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0), + make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0), + make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0), + make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0))); #endif // CONFIG_USE_X86INC #if CONFIG_VP9_HIGHBITDEPTH /* TODO(debargha): This test does not support the highbd version -const VarianceMxNFunc highbd_12_mse16x16_sse2 = vpx_highbd_12_mse16x16_sse2; -const VarianceMxNFunc highbd_12_mse16x8_sse2 = vpx_highbd_12_mse16x8_sse2; -const VarianceMxNFunc highbd_12_mse8x16_sse2 = vpx_highbd_12_mse8x16_sse2; -const VarianceMxNFunc highbd_12_mse8x8_sse2 = vpx_highbd_12_mse8x8_sse2; - -const VarianceMxNFunc highbd_10_mse16x16_sse2 = vpx_highbd_10_mse16x16_sse2; -const VarianceMxNFunc highbd_10_mse16x8_sse2 = vpx_highbd_10_mse16x8_sse2; -const VarianceMxNFunc highbd_10_mse8x16_sse2 = vpx_highbd_10_mse8x16_sse2; -const VarianceMxNFunc highbd_10_mse8x8_sse2 = vpx_highbd_10_mse8x8_sse2; - -const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2; -const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2; -const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2; -const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2; INSTANTIATE_TEST_CASE_P( - SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2), - make_tuple(4, 3, highbd_12_mse16x8_sse2), - make_tuple(3, 4, highbd_12_mse8x16_sse2), - make_tuple(3, 3, highbd_12_mse8x8_sse2), - make_tuple(4, 4, highbd_10_mse16x16_sse2), - make_tuple(4, 3, highbd_10_mse16x8_sse2), - make_tuple(3, 4, highbd_10_mse8x16_sse2), - make_tuple(3, 3, highbd_10_mse8x8_sse2), - make_tuple(4, 4, highbd_8_mse16x16_sse2), - make_tuple(4, 3, highbd_8_mse16x8_sse2), - make_tuple(3, 4, highbd_8_mse8x16_sse2), - make_tuple(3, 3, highbd_8_mse8x8_sse2))); + SSE2, VpxHBDMseTest, + ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_sse2), + make_tuple(4, 3, &vpx_highbd_12_mse16x8_sse2), + make_tuple(3, 4, &vpx_highbd_12_mse8x16_sse2), + make_tuple(3, 3, &vpx_highbd_12_mse8x8_sse2), + make_tuple(4, 4, &vpx_highbd_10_mse16x16_sse2), + make_tuple(4, 3, &vpx_highbd_10_mse16x8_sse2), + make_tuple(3, 4, &vpx_highbd_10_mse8x16_sse2), + make_tuple(3, 3, &vpx_highbd_10_mse8x8_sse2), + make_tuple(4, 4, &vpx_highbd_8_mse16x16_sse2), + make_tuple(4, 3, &vpx_highbd_8_mse16x8_sse2), + make_tuple(3, 4, &vpx_highbd_8_mse8x16_sse2), + make_tuple(3, 3, &vpx_highbd_8_mse8x8_sse2))); */ -const VarianceMxNFunc highbd_12_variance64x64_sse2 = - vpx_highbd_12_variance64x64_sse2; -const VarianceMxNFunc highbd_12_variance64x32_sse2 = - vpx_highbd_12_variance64x32_sse2; -const VarianceMxNFunc highbd_12_variance32x64_sse2 = - vpx_highbd_12_variance32x64_sse2; -const VarianceMxNFunc highbd_12_variance32x32_sse2 = - vpx_highbd_12_variance32x32_sse2; -const VarianceMxNFunc highbd_12_variance32x16_sse2 = - vpx_highbd_12_variance32x16_sse2; -const VarianceMxNFunc highbd_12_variance16x32_sse2 = - vpx_highbd_12_variance16x32_sse2; -const VarianceMxNFunc highbd_12_variance16x16_sse2 = - vpx_highbd_12_variance16x16_sse2; -const VarianceMxNFunc highbd_12_variance16x8_sse2 = - vpx_highbd_12_variance16x8_sse2; -const VarianceMxNFunc highbd_12_variance8x16_sse2 = - vpx_highbd_12_variance8x16_sse2; -const VarianceMxNFunc highbd_12_variance8x8_sse2 = - vpx_highbd_12_variance8x8_sse2; -const VarianceMxNFunc highbd_10_variance64x64_sse2 = - vpx_highbd_10_variance64x64_sse2; -const VarianceMxNFunc highbd_10_variance64x32_sse2 = - vpx_highbd_10_variance64x32_sse2; -const VarianceMxNFunc highbd_10_variance32x64_sse2 = - vpx_highbd_10_variance32x64_sse2; -const VarianceMxNFunc highbd_10_variance32x32_sse2 = - vpx_highbd_10_variance32x32_sse2; -const VarianceMxNFunc highbd_10_variance32x16_sse2 = - vpx_highbd_10_variance32x16_sse2; -const VarianceMxNFunc highbd_10_variance16x32_sse2 = - vpx_highbd_10_variance16x32_sse2; -const VarianceMxNFunc highbd_10_variance16x16_sse2 = - vpx_highbd_10_variance16x16_sse2; -const VarianceMxNFunc highbd_10_variance16x8_sse2 = - vpx_highbd_10_variance16x8_sse2; -const VarianceMxNFunc highbd_10_variance8x16_sse2 = - vpx_highbd_10_variance8x16_sse2; -const VarianceMxNFunc highbd_10_variance8x8_sse2 = - vpx_highbd_10_variance8x8_sse2; -const VarianceMxNFunc highbd_8_variance64x64_sse2 = - vpx_highbd_8_variance64x64_sse2; -const VarianceMxNFunc highbd_8_variance64x32_sse2 = - vpx_highbd_8_variance64x32_sse2; -const VarianceMxNFunc highbd_8_variance32x64_sse2 = - vpx_highbd_8_variance32x64_sse2; -const VarianceMxNFunc highbd_8_variance32x32_sse2 = - vpx_highbd_8_variance32x32_sse2; -const VarianceMxNFunc highbd_8_variance32x16_sse2 = - vpx_highbd_8_variance32x16_sse2; -const VarianceMxNFunc highbd_8_variance16x32_sse2 = - vpx_highbd_8_variance16x32_sse2; -const VarianceMxNFunc highbd_8_variance16x16_sse2 = - vpx_highbd_8_variance16x16_sse2; -const VarianceMxNFunc highbd_8_variance16x8_sse2 = - vpx_highbd_8_variance16x8_sse2; -const VarianceMxNFunc highbd_8_variance8x16_sse2 = - vpx_highbd_8_variance8x16_sse2; -const VarianceMxNFunc highbd_8_variance8x8_sse2 = - vpx_highbd_8_variance8x8_sse2; - INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDVarianceTest, - ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_sse2, 12), - make_tuple(6, 5, highbd_12_variance64x32_sse2, 12), - make_tuple(5, 6, highbd_12_variance32x64_sse2, 12), - make_tuple(5, 5, highbd_12_variance32x32_sse2, 12), - make_tuple(5, 4, highbd_12_variance32x16_sse2, 12), - make_tuple(4, 5, highbd_12_variance16x32_sse2, 12), - make_tuple(4, 4, highbd_12_variance16x16_sse2, 12), - make_tuple(4, 3, highbd_12_variance16x8_sse2, 12), - make_tuple(3, 4, highbd_12_variance8x16_sse2, 12), - make_tuple(3, 3, highbd_12_variance8x8_sse2, 12), - make_tuple(6, 6, highbd_10_variance64x64_sse2, 10), - make_tuple(6, 5, highbd_10_variance64x32_sse2, 10), - make_tuple(5, 6, highbd_10_variance32x64_sse2, 10), - make_tuple(5, 5, highbd_10_variance32x32_sse2, 10), - make_tuple(5, 4, highbd_10_variance32x16_sse2, 10), - make_tuple(4, 5, highbd_10_variance16x32_sse2, 10), - make_tuple(4, 4, highbd_10_variance16x16_sse2, 10), - make_tuple(4, 3, highbd_10_variance16x8_sse2, 10), - make_tuple(3, 4, highbd_10_variance8x16_sse2, 10), - make_tuple(3, 3, highbd_10_variance8x8_sse2, 10), - make_tuple(6, 6, highbd_8_variance64x64_sse2, 8), - make_tuple(6, 5, highbd_8_variance64x32_sse2, 8), - make_tuple(5, 6, highbd_8_variance32x64_sse2, 8), - make_tuple(5, 5, highbd_8_variance32x32_sse2, 8), - make_tuple(5, 4, highbd_8_variance32x16_sse2, 8), - make_tuple(4, 5, highbd_8_variance16x32_sse2, 8), - make_tuple(4, 4, highbd_8_variance16x16_sse2, 8), - make_tuple(4, 3, highbd_8_variance16x8_sse2, 8), - make_tuple(3, 4, highbd_8_variance8x16_sse2, 8), - make_tuple(3, 3, highbd_8_variance8x8_sse2, 8))); + ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_sse2, 12), + make_tuple(6, 5, &vpx_highbd_12_variance64x32_sse2, 12), + make_tuple(5, 6, &vpx_highbd_12_variance32x64_sse2, 12), + make_tuple(5, 5, &vpx_highbd_12_variance32x32_sse2, 12), + make_tuple(5, 4, &vpx_highbd_12_variance32x16_sse2, 12), + make_tuple(4, 5, &vpx_highbd_12_variance16x32_sse2, 12), + make_tuple(4, 4, &vpx_highbd_12_variance16x16_sse2, 12), + make_tuple(4, 3, &vpx_highbd_12_variance16x8_sse2, 12), + make_tuple(3, 4, &vpx_highbd_12_variance8x16_sse2, 12), + make_tuple(3, 3, &vpx_highbd_12_variance8x8_sse2, 12), + make_tuple(6, 6, &vpx_highbd_10_variance64x64_sse2, 10), + make_tuple(6, 5, &vpx_highbd_10_variance64x32_sse2, 10), + make_tuple(5, 6, &vpx_highbd_10_variance32x64_sse2, 10), + make_tuple(5, 5, &vpx_highbd_10_variance32x32_sse2, 10), + make_tuple(5, 4, &vpx_highbd_10_variance32x16_sse2, 10), + make_tuple(4, 5, &vpx_highbd_10_variance16x32_sse2, 10), + make_tuple(4, 4, &vpx_highbd_10_variance16x16_sse2, 10), + make_tuple(4, 3, &vpx_highbd_10_variance16x8_sse2, 10), + make_tuple(3, 4, &vpx_highbd_10_variance8x16_sse2, 10), + make_tuple(3, 3, &vpx_highbd_10_variance8x8_sse2, 10), + make_tuple(6, 6, &vpx_highbd_8_variance64x64_sse2, 8), + make_tuple(6, 5, &vpx_highbd_8_variance64x32_sse2, 8), + make_tuple(5, 6, &vpx_highbd_8_variance32x64_sse2, 8), + make_tuple(5, 5, &vpx_highbd_8_variance32x32_sse2, 8), + make_tuple(5, 4, &vpx_highbd_8_variance32x16_sse2, 8), + make_tuple(4, 5, &vpx_highbd_8_variance16x32_sse2, 8), + make_tuple(4, 4, &vpx_highbd_8_variance16x16_sse2, 8), + make_tuple(4, 3, &vpx_highbd_8_variance16x8_sse2, 8), + make_tuple(3, 4, &vpx_highbd_8_variance8x16_sse2, 8), + make_tuple(3, 3, &vpx_highbd_8_variance8x8_sse2, 8))); #if CONFIG_USE_X86INC -const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 = - vpx_highbd_12_sub_pixel_variance64x64_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 = - vpx_highbd_12_sub_pixel_variance64x32_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 = - vpx_highbd_12_sub_pixel_variance32x64_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 = - vpx_highbd_12_sub_pixel_variance32x32_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 = - vpx_highbd_12_sub_pixel_variance32x16_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 = - vpx_highbd_12_sub_pixel_variance16x32_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 = - vpx_highbd_12_sub_pixel_variance16x16_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 = - vpx_highbd_12_sub_pixel_variance16x8_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 = - vpx_highbd_12_sub_pixel_variance8x16_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 = - vpx_highbd_12_sub_pixel_variance8x8_sse2; -const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 = - vpx_highbd_12_sub_pixel_variance8x4_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 = - vpx_highbd_10_sub_pixel_variance64x64_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 = - vpx_highbd_10_sub_pixel_variance64x32_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 = - vpx_highbd_10_sub_pixel_variance32x64_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 = - vpx_highbd_10_sub_pixel_variance32x32_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 = - vpx_highbd_10_sub_pixel_variance32x16_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 = - vpx_highbd_10_sub_pixel_variance16x32_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 = - vpx_highbd_10_sub_pixel_variance16x16_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 = - vpx_highbd_10_sub_pixel_variance16x8_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 = - vpx_highbd_10_sub_pixel_variance8x16_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 = - vpx_highbd_10_sub_pixel_variance8x8_sse2; -const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 = - vpx_highbd_10_sub_pixel_variance8x4_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance64x64_sse2 = - vpx_highbd_8_sub_pixel_variance64x64_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance64x32_sse2 = - vpx_highbd_8_sub_pixel_variance64x32_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance32x64_sse2 = - vpx_highbd_8_sub_pixel_variance32x64_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance32x32_sse2 = - vpx_highbd_8_sub_pixel_variance32x32_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance32x16_sse2 = - vpx_highbd_8_sub_pixel_variance32x16_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance16x32_sse2 = - vpx_highbd_8_sub_pixel_variance16x32_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance16x16_sse2 = - vpx_highbd_8_sub_pixel_variance16x16_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance16x8_sse2 = - vpx_highbd_8_sub_pixel_variance16x8_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance8x16_sse2 = - vpx_highbd_8_sub_pixel_variance8x16_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance8x8_sse2 = - vpx_highbd_8_sub_pixel_variance8x8_sse2; -const SubpixVarMxNFunc highbd_8_subpel_variance8x4_sse2 = - vpx_highbd_8_sub_pixel_variance8x4_sse2; INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, highbd_12_subpel_variance64x64_sse2, 12), - make_tuple(6, 5, highbd_12_subpel_variance64x32_sse2, 12), - make_tuple(5, 6, highbd_12_subpel_variance32x64_sse2, 12), - make_tuple(5, 5, highbd_12_subpel_variance32x32_sse2, 12), - make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12), - make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12), - make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12), - make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2, 12), - make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2, 12), - make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2, 12), - make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2, 12), - make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10), - make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10), - make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10), - make_tuple(5, 5, highbd_10_subpel_variance32x32_sse2, 10), - make_tuple(5, 4, highbd_10_subpel_variance32x16_sse2, 10), - make_tuple(4, 5, highbd_10_subpel_variance16x32_sse2, 10), - make_tuple(4, 4, highbd_10_subpel_variance16x16_sse2, 10), - make_tuple(4, 3, highbd_10_subpel_variance16x8_sse2, 10), - make_tuple(3, 4, highbd_10_subpel_variance8x16_sse2, 10), - make_tuple(3, 3, highbd_10_subpel_variance8x8_sse2, 10), - make_tuple(3, 2, highbd_10_subpel_variance8x4_sse2, 10), - make_tuple(6, 6, highbd_8_subpel_variance64x64_sse2, 8), - make_tuple(6, 5, highbd_8_subpel_variance64x32_sse2, 8), - make_tuple(5, 6, highbd_8_subpel_variance32x64_sse2, 8), - make_tuple(5, 5, highbd_8_subpel_variance32x32_sse2, 8), - make_tuple(5, 4, highbd_8_subpel_variance32x16_sse2, 8), - make_tuple(4, 5, highbd_8_subpel_variance16x32_sse2, 8), - make_tuple(4, 4, highbd_8_subpel_variance16x16_sse2, 8), - make_tuple(4, 3, highbd_8_subpel_variance16x8_sse2, 8), - make_tuple(3, 4, highbd_8_subpel_variance8x16_sse2, 8), - make_tuple(3, 3, highbd_8_subpel_variance8x8_sse2, 8), - make_tuple(3, 2, highbd_8_subpel_variance8x4_sse2, 8))); - -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x64_sse2 = - vpx_highbd_12_sub_pixel_avg_variance64x64_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x32_sse2 = - vpx_highbd_12_sub_pixel_avg_variance64x32_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x64_sse2 = - vpx_highbd_12_sub_pixel_avg_variance32x64_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x32_sse2 = - vpx_highbd_12_sub_pixel_avg_variance32x32_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x16_sse2 = - vpx_highbd_12_sub_pixel_avg_variance32x16_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x32_sse2 = - vpx_highbd_12_sub_pixel_avg_variance16x32_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x16_sse2 = - vpx_highbd_12_sub_pixel_avg_variance16x16_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x8_sse2 = - vpx_highbd_12_sub_pixel_avg_variance16x8_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x16_sse2 = - vpx_highbd_12_sub_pixel_avg_variance8x16_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x8_sse2 = - vpx_highbd_12_sub_pixel_avg_variance8x8_sse2; -const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x4_sse2 = - vpx_highbd_12_sub_pixel_avg_variance8x4_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x64_sse2 = - vpx_highbd_10_sub_pixel_avg_variance64x64_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x32_sse2 = - vpx_highbd_10_sub_pixel_avg_variance64x32_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x64_sse2 = - vpx_highbd_10_sub_pixel_avg_variance32x64_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x32_sse2 = - vpx_highbd_10_sub_pixel_avg_variance32x32_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x16_sse2 = - vpx_highbd_10_sub_pixel_avg_variance32x16_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x32_sse2 = - vpx_highbd_10_sub_pixel_avg_variance16x32_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x16_sse2 = - vpx_highbd_10_sub_pixel_avg_variance16x16_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x8_sse2 = - vpx_highbd_10_sub_pixel_avg_variance16x8_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x16_sse2 = - vpx_highbd_10_sub_pixel_avg_variance8x16_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x8_sse2 = - vpx_highbd_10_sub_pixel_avg_variance8x8_sse2; -const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x4_sse2 = - vpx_highbd_10_sub_pixel_avg_variance8x4_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x64_sse2 = - vpx_highbd_8_sub_pixel_avg_variance64x64_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x32_sse2 = - vpx_highbd_8_sub_pixel_avg_variance64x32_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x64_sse2 = - vpx_highbd_8_sub_pixel_avg_variance32x64_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x32_sse2 = - vpx_highbd_8_sub_pixel_avg_variance32x32_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x16_sse2 = - vpx_highbd_8_sub_pixel_avg_variance32x16_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x32_sse2 = - vpx_highbd_8_sub_pixel_avg_variance16x32_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x16_sse2 = - vpx_highbd_8_sub_pixel_avg_variance16x16_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x8_sse2 = - vpx_highbd_8_sub_pixel_avg_variance16x8_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x16_sse2 = - vpx_highbd_8_sub_pixel_avg_variance8x16_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x8_sse2 = - vpx_highbd_8_sub_pixel_avg_variance8x8_sse2; -const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x4_sse2 = - vpx_highbd_8_sub_pixel_avg_variance8x4_sse2; + ::testing::Values( + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_sse2, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_sse2, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_sse2, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_sse2, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_sse2, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_sse2, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_sse2, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_sse2, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_sse2, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_sse2, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_sse2, 12), + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_sse2, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_sse2, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_sse2, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_sse2, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_sse2, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_sse2, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_sse2, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_sse2, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_sse2, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_sse2, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_sse2, 10), + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_sse2, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_sse2, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_sse2, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_sse2, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_sse2, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_sse2, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_sse2, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_sse2, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_sse2, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_sse2, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_sse2, 8))); + INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDSubpelAvgVarianceTest, ::testing::Values( - make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12), - make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12), - make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12), - make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_sse2, 12), - make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12), - make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12), - make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12), - make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2, 12), - make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2, 12), - make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2, 12), - make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2, 12), - make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10), - make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10), - make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10), - make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_sse2, 10), - make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10), - make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10), - make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10), - make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2, 10), - make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2, 10), - make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2, 10), - make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2, 10), - make_tuple(6, 6, highbd_8_subpel_avg_variance64x64_sse2, 8), - make_tuple(6, 5, highbd_8_subpel_avg_variance64x32_sse2, 8), - make_tuple(5, 6, highbd_8_subpel_avg_variance32x64_sse2, 8), - make_tuple(5, 5, highbd_8_subpel_avg_variance32x32_sse2, 8), - make_tuple(5, 4, highbd_8_subpel_avg_variance32x16_sse2, 8), - make_tuple(4, 5, highbd_8_subpel_avg_variance16x32_sse2, 8), - make_tuple(4, 4, highbd_8_subpel_avg_variance16x16_sse2, 8), - make_tuple(4, 3, highbd_8_subpel_avg_variance16x8_sse2, 8), - make_tuple(3, 4, highbd_8_subpel_avg_variance8x16_sse2, 8), - make_tuple(3, 3, highbd_8_subpel_avg_variance8x8_sse2, 8), - make_tuple(3, 2, highbd_8_subpel_avg_variance8x4_sse2, 8))); + make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_sse2, 12), + make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_sse2, 12), + make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_sse2, 12), + make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_sse2, 12), + make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_sse2, 12), + make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_sse2, 12), + make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_sse2, 12), + make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_sse2, 12), + make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_sse2, 12), + make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_sse2, 12), + make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_sse2, 12), + make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_sse2, 10), + make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_sse2, 10), + make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_sse2, 10), + make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_sse2, 10), + make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_sse2, 10), + make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_sse2, 10), + make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_sse2, 10), + make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_sse2, 10), + make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_sse2, 10), + make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_sse2, 10), + make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_sse2, 10), + make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_sse2, 8), + make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_sse2, 8), + make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_sse2, 8), + make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_sse2, 8), + make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_sse2, 8), + make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_sse2, 8), + make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_sse2, 8), + make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_sse2, 8), + make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), + make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8), + make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8))); #endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 #if HAVE_SSSE3 #if CONFIG_USE_X86INC -const SubpixVarMxNFunc subpel_variance64x64_ssse3 = - vpx_sub_pixel_variance64x64_ssse3; -const SubpixVarMxNFunc subpel_variance64x32_ssse3 = - vpx_sub_pixel_variance64x32_ssse3; -const SubpixVarMxNFunc subpel_variance32x64_ssse3 = - vpx_sub_pixel_variance32x64_ssse3; -const SubpixVarMxNFunc subpel_variance32x32_ssse3 = - vpx_sub_pixel_variance32x32_ssse3; -const SubpixVarMxNFunc subpel_variance32x16_ssse3 = - vpx_sub_pixel_variance32x16_ssse3; -const SubpixVarMxNFunc subpel_variance16x32_ssse3 = - vpx_sub_pixel_variance16x32_ssse3; -const SubpixVarMxNFunc subpel_variance16x16_ssse3 = - vpx_sub_pixel_variance16x16_ssse3; -const SubpixVarMxNFunc subpel_variance16x8_ssse3 = - vpx_sub_pixel_variance16x8_ssse3; -const SubpixVarMxNFunc subpel_variance8x16_ssse3 = - vpx_sub_pixel_variance8x16_ssse3; -const SubpixVarMxNFunc subpel_variance8x8_ssse3 = - vpx_sub_pixel_variance8x8_ssse3; -const SubpixVarMxNFunc subpel_variance8x4_ssse3 = - vpx_sub_pixel_variance8x4_ssse3; -const SubpixVarMxNFunc subpel_variance4x8_ssse3 = - vpx_sub_pixel_variance4x8_ssse3; -const SubpixVarMxNFunc subpel_variance4x4_ssse3 = - vpx_sub_pixel_variance4x4_ssse3; INSTANTIATE_TEST_CASE_P( SSSE3, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_variance64x64_ssse3, 0), - make_tuple(6, 5, subpel_variance64x32_ssse3, 0), - make_tuple(5, 6, subpel_variance32x64_ssse3, 0), - make_tuple(5, 5, subpel_variance32x32_ssse3, 0), - make_tuple(5, 4, subpel_variance32x16_ssse3, 0), - make_tuple(4, 5, subpel_variance16x32_ssse3, 0), - make_tuple(4, 4, subpel_variance16x16_ssse3, 0), - make_tuple(4, 3, subpel_variance16x8_ssse3, 0), - make_tuple(3, 4, subpel_variance8x16_ssse3, 0), - make_tuple(3, 3, subpel_variance8x8_ssse3, 0), - make_tuple(3, 2, subpel_variance8x4_ssse3, 0), - make_tuple(2, 3, subpel_variance4x8_ssse3, 0), - make_tuple(2, 2, subpel_variance4x4_ssse3, 0))); - -const SubpixAvgVarMxNFunc subpel_avg_variance64x64_ssse3 = - vpx_sub_pixel_avg_variance64x64_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance64x32_ssse3 = - vpx_sub_pixel_avg_variance64x32_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance32x64_ssse3 = - vpx_sub_pixel_avg_variance32x64_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance32x32_ssse3 = - vpx_sub_pixel_avg_variance32x32_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance32x16_ssse3 = - vpx_sub_pixel_avg_variance32x16_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance16x32_ssse3 = - vpx_sub_pixel_avg_variance16x32_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance16x16_ssse3 = - vpx_sub_pixel_avg_variance16x16_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance16x8_ssse3 = - vpx_sub_pixel_avg_variance16x8_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance8x16_ssse3 = - vpx_sub_pixel_avg_variance8x16_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance8x8_ssse3 = - vpx_sub_pixel_avg_variance8x8_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance8x4_ssse3 = - vpx_sub_pixel_avg_variance8x4_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance4x8_ssse3 = - vpx_sub_pixel_avg_variance4x8_ssse3; -const SubpixAvgVarMxNFunc subpel_avg_variance4x4_ssse3 = - vpx_sub_pixel_avg_variance4x4_ssse3; + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0), + make_tuple(6, 5, &vpx_sub_pixel_variance64x32_ssse3, 0), + make_tuple(5, 6, &vpx_sub_pixel_variance32x64_ssse3, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_ssse3, 0), + make_tuple(5, 4, &vpx_sub_pixel_variance32x16_ssse3, 0), + make_tuple(4, 5, &vpx_sub_pixel_variance16x32_ssse3, 0), + make_tuple(4, 4, &vpx_sub_pixel_variance16x16_ssse3, 0), + make_tuple(4, 3, &vpx_sub_pixel_variance16x8_ssse3, 0), + make_tuple(3, 4, &vpx_sub_pixel_variance8x16_ssse3, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_ssse3, 0), + make_tuple(3, 2, &vpx_sub_pixel_variance8x4_ssse3, 0), + make_tuple(2, 3, &vpx_sub_pixel_variance4x8_ssse3, 0), + make_tuple(2, 2, &vpx_sub_pixel_variance4x4_ssse3, 0))); + INSTANTIATE_TEST_CASE_P( SSSE3, VpxSubpelAvgVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_ssse3, 0), - make_tuple(6, 5, subpel_avg_variance64x32_ssse3, 0), - make_tuple(5, 6, subpel_avg_variance32x64_ssse3, 0), - make_tuple(5, 5, subpel_avg_variance32x32_ssse3, 0), - make_tuple(5, 4, subpel_avg_variance32x16_ssse3, 0), - make_tuple(4, 5, subpel_avg_variance16x32_ssse3, 0), - make_tuple(4, 4, subpel_avg_variance16x16_ssse3, 0), - make_tuple(4, 3, subpel_avg_variance16x8_ssse3, 0), - make_tuple(3, 4, subpel_avg_variance8x16_ssse3, 0), - make_tuple(3, 3, subpel_avg_variance8x8_ssse3, 0), - make_tuple(3, 2, subpel_avg_variance8x4_ssse3, 0), - make_tuple(2, 3, subpel_avg_variance4x8_ssse3, 0), - make_tuple(2, 2, subpel_avg_variance4x4_ssse3, 0))); + ::testing::Values( + make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_ssse3, 0), + make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_ssse3, 0), + make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_ssse3, 0), + make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_ssse3, 0), + make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_ssse3, 0), + make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_ssse3, 0), + make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_ssse3, 0), + make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_ssse3, 0), + make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_ssse3, 0), + make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_ssse3, 0), + make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0), + make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0), + make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0))); #endif // CONFIG_USE_X86INC #endif // HAVE_SSSE3 #if HAVE_AVX2 -const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2; INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_avx2))); + ::testing::Values(make_tuple(4, 4, + &vpx_mse16x16_avx2))); -const VarianceMxNFunc variance64x64_avx2 = vpx_variance64x64_avx2; -const VarianceMxNFunc variance64x32_avx2 = vpx_variance64x32_avx2; -const VarianceMxNFunc variance32x32_avx2 = vpx_variance32x32_avx2; -const VarianceMxNFunc variance32x16_avx2 = vpx_variance32x16_avx2; -const VarianceMxNFunc variance16x16_avx2 = vpx_variance16x16_avx2; INSTANTIATE_TEST_CASE_P( AVX2, VpxVarianceTest, - ::testing::Values(make_tuple(6, 6, variance64x64_avx2, 0), - make_tuple(6, 5, variance64x32_avx2, 0), - make_tuple(5, 5, variance32x32_avx2, 0), - make_tuple(5, 4, variance32x16_avx2, 0), - make_tuple(4, 4, variance16x16_avx2, 0))); - -const SubpixVarMxNFunc subpel_variance64x64_avx2 = - vpx_sub_pixel_variance64x64_avx2; -const SubpixVarMxNFunc subpel_variance32x32_avx2 = - vpx_sub_pixel_variance32x32_avx2; + ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_avx2, 0), + make_tuple(6, 5, &vpx_variance64x32_avx2, 0), + make_tuple(5, 5, &vpx_variance32x32_avx2, 0), + make_tuple(5, 4, &vpx_variance32x16_avx2, 0), + make_tuple(4, 4, &vpx_variance16x16_avx2, 0))); + INSTANTIATE_TEST_CASE_P( AVX2, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_variance64x64_avx2, 0), - make_tuple(5, 5, subpel_variance32x32_avx2, 0))); + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_avx2, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_avx2, 0))); -const SubpixAvgVarMxNFunc subpel_avg_variance64x64_avx2 = - vpx_sub_pixel_avg_variance64x64_avx2; -const SubpixAvgVarMxNFunc subpel_avg_variance32x32_avx2 = - vpx_sub_pixel_avg_variance32x32_avx2; INSTANTIATE_TEST_CASE_P( AVX2, VpxSubpelAvgVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_avx2, 0), - make_tuple(5, 5, subpel_avg_variance32x32_avx2, 0))); + ::testing::Values( + make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_avx2, 0), + make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_avx2, 0))); #endif // HAVE_AVX2 #if HAVE_MEDIA -const VarianceMxNFunc mse16x16_media = vpx_mse16x16_media; INSTANTIATE_TEST_CASE_P(MEDIA, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_media))); + ::testing::Values(make_tuple(4, 4, + &vpx_mse16x16_media))); -const VarianceMxNFunc variance16x16_media = vpx_variance16x16_media; -const VarianceMxNFunc variance8x8_media = vpx_variance8x8_media; INSTANTIATE_TEST_CASE_P( MEDIA, VpxVarianceTest, - ::testing::Values(make_tuple(4, 4, variance16x16_media, 0), - make_tuple(3, 3, variance8x8_media, 0))); + ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_media, 0), + make_tuple(3, 3, &vpx_variance8x8_media, 0))); -const SubpixVarMxNFunc subpel_variance16x16_media = - vpx_sub_pixel_variance16x16_media; -const SubpixVarMxNFunc subpel_variance8x8_media = - vpx_sub_pixel_variance8x8_media; INSTANTIATE_TEST_CASE_P( MEDIA, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(4, 4, subpel_variance16x16_media, 0), - make_tuple(3, 3, subpel_variance8x8_media, 0))); + ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_media, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_media, 0))); #endif // HAVE_MEDIA #if HAVE_NEON -const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon; INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest, - ::testing::Values(make_tuple(2, 2, get4x4sse_cs_neon))); + ::testing::Values(make_tuple(2, 2, + &vpx_get4x4sse_cs_neon))); -const VarianceMxNFunc mse16x16_neon = vpx_mse16x16_neon; INSTANTIATE_TEST_CASE_P(NEON, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_neon))); - -const VarianceMxNFunc variance64x64_neon = vpx_variance64x64_neon; -const VarianceMxNFunc variance64x32_neon = vpx_variance64x32_neon; -const VarianceMxNFunc variance32x64_neon = vpx_variance32x64_neon; -const VarianceMxNFunc variance32x32_neon = vpx_variance32x32_neon; -const VarianceMxNFunc variance16x16_neon = vpx_variance16x16_neon; -const VarianceMxNFunc variance16x8_neon = vpx_variance16x8_neon; -const VarianceMxNFunc variance8x16_neon = vpx_variance8x16_neon; -const VarianceMxNFunc variance8x8_neon = vpx_variance8x8_neon; + ::testing::Values(make_tuple(4, 4, + &vpx_mse16x16_neon))); + INSTANTIATE_TEST_CASE_P( NEON, VpxVarianceTest, - ::testing::Values(make_tuple(6, 6, variance64x64_neon, 0), - make_tuple(6, 5, variance64x32_neon, 0), - make_tuple(5, 6, variance32x64_neon, 0), - make_tuple(5, 5, variance32x32_neon, 0), - make_tuple(4, 4, variance16x16_neon, 0), - make_tuple(4, 3, variance16x8_neon, 0), - make_tuple(3, 4, variance8x16_neon, 0), - make_tuple(3, 3, variance8x8_neon, 0))); - -const SubpixVarMxNFunc subpel_variance64x64_neon = - vpx_sub_pixel_variance64x64_neon; -const SubpixVarMxNFunc subpel_variance32x32_neon = - vpx_sub_pixel_variance32x32_neon; -const SubpixVarMxNFunc subpel_variance16x16_neon = - vpx_sub_pixel_variance16x16_neon; -const SubpixVarMxNFunc subpel_variance8x8_neon = vpx_sub_pixel_variance8x8_neon; + ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_neon, 0), + make_tuple(6, 5, &vpx_variance64x32_neon, 0), + make_tuple(5, 6, &vpx_variance32x64_neon, 0), + make_tuple(5, 5, &vpx_variance32x32_neon, 0), + make_tuple(4, 4, &vpx_variance16x16_neon, 0), + make_tuple(4, 3, &vpx_variance16x8_neon, 0), + make_tuple(3, 4, &vpx_variance8x16_neon, 0), + make_tuple(3, 3, &vpx_variance8x8_neon, 0))); + INSTANTIATE_TEST_CASE_P( NEON, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_variance64x64_neon, 0), - make_tuple(5, 5, subpel_variance32x32_neon, 0), - make_tuple(4, 4, subpel_variance16x16_neon, 0), - make_tuple(3, 3, subpel_variance8x8_neon, 0))); + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_neon, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_neon, 0), + make_tuple(4, 4, &vpx_sub_pixel_variance16x16_neon, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_neon, 0))); #endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_msa)); -const Get4x4SseFunc get4x4sse_cs_msa = vpx_get4x4sse_cs_msa; INSTANTIATE_TEST_CASE_P(MSA, VpxSseTest, - ::testing::Values(make_tuple(2, 2, get4x4sse_cs_msa))); + ::testing::Values(make_tuple(2, 2, + &vpx_get4x4sse_cs_msa))); -const VarianceMxNFunc mse16x16_msa = vpx_mse16x16_msa; -const VarianceMxNFunc mse16x8_msa = vpx_mse16x8_msa; -const VarianceMxNFunc mse8x16_msa = vpx_mse8x16_msa; -const VarianceMxNFunc mse8x8_msa = vpx_mse8x8_msa; INSTANTIATE_TEST_CASE_P(MSA, VpxMseTest, - ::testing::Values(make_tuple(4, 4, mse16x16_msa), - make_tuple(4, 3, mse16x8_msa), - make_tuple(3, 4, mse8x16_msa), - make_tuple(3, 3, mse8x8_msa))); - -const VarianceMxNFunc variance64x64_msa = vpx_variance64x64_msa; -const VarianceMxNFunc variance64x32_msa = vpx_variance64x32_msa; -const VarianceMxNFunc variance32x64_msa = vpx_variance32x64_msa; -const VarianceMxNFunc variance32x32_msa = vpx_variance32x32_msa; -const VarianceMxNFunc variance32x16_msa = vpx_variance32x16_msa; -const VarianceMxNFunc variance16x32_msa = vpx_variance16x32_msa; -const VarianceMxNFunc variance16x16_msa = vpx_variance16x16_msa; -const VarianceMxNFunc variance16x8_msa = vpx_variance16x8_msa; -const VarianceMxNFunc variance8x16_msa = vpx_variance8x16_msa; -const VarianceMxNFunc variance8x8_msa = vpx_variance8x8_msa; -const VarianceMxNFunc variance8x4_msa = vpx_variance8x4_msa; -const VarianceMxNFunc variance4x8_msa = vpx_variance4x8_msa; -const VarianceMxNFunc variance4x4_msa = vpx_variance4x4_msa; + ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_msa), + make_tuple(4, 3, &vpx_mse16x8_msa), + make_tuple(3, 4, &vpx_mse8x16_msa), + make_tuple(3, 3, &vpx_mse8x8_msa))); + INSTANTIATE_TEST_CASE_P( MSA, VpxVarianceTest, - ::testing::Values(make_tuple(6, 6, variance64x64_msa, 0), - make_tuple(6, 5, variance64x32_msa, 0), - make_tuple(5, 6, variance32x64_msa, 0), - make_tuple(5, 5, variance32x32_msa, 0), - make_tuple(5, 4, variance32x16_msa, 0), - make_tuple(4, 5, variance16x32_msa, 0), - make_tuple(4, 4, variance16x16_msa, 0), - make_tuple(4, 3, variance16x8_msa, 0), - make_tuple(3, 4, variance8x16_msa, 0), - make_tuple(3, 3, variance8x8_msa, 0), - make_tuple(3, 2, variance8x4_msa, 0), - make_tuple(2, 3, variance4x8_msa, 0), - make_tuple(2, 2, variance4x4_msa, 0))); - -const SubpixVarMxNFunc subpel_variance4x4_msa = vpx_sub_pixel_variance4x4_msa; -const SubpixVarMxNFunc subpel_variance4x8_msa = vpx_sub_pixel_variance4x8_msa; -const SubpixVarMxNFunc subpel_variance8x4_msa = vpx_sub_pixel_variance8x4_msa; -const SubpixVarMxNFunc subpel_variance8x8_msa = vpx_sub_pixel_variance8x8_msa; -const SubpixVarMxNFunc subpel_variance8x16_msa = vpx_sub_pixel_variance8x16_msa; -const SubpixVarMxNFunc subpel_variance16x8_msa = vpx_sub_pixel_variance16x8_msa; -const SubpixVarMxNFunc subpel_variance16x16_msa = - vpx_sub_pixel_variance16x16_msa; -const SubpixVarMxNFunc subpel_variance16x32_msa = - vpx_sub_pixel_variance16x32_msa; -const SubpixVarMxNFunc subpel_variance32x16_msa = - vpx_sub_pixel_variance32x16_msa; -const SubpixVarMxNFunc subpel_variance32x32_msa = - vpx_sub_pixel_variance32x32_msa; -const SubpixVarMxNFunc subpel_variance32x64_msa = - vpx_sub_pixel_variance32x64_msa; -const SubpixVarMxNFunc subpel_variance64x32_msa = - vpx_sub_pixel_variance64x32_msa; -const SubpixVarMxNFunc subpel_variance64x64_msa = - vpx_sub_pixel_variance64x64_msa; + ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_msa, 0), + make_tuple(6, 5, &vpx_variance64x32_msa, 0), + make_tuple(5, 6, &vpx_variance32x64_msa, 0), + make_tuple(5, 5, &vpx_variance32x32_msa, 0), + make_tuple(5, 4, &vpx_variance32x16_msa, 0), + make_tuple(4, 5, &vpx_variance16x32_msa, 0), + make_tuple(4, 4, &vpx_variance16x16_msa, 0), + make_tuple(4, 3, &vpx_variance16x8_msa, 0), + make_tuple(3, 4, &vpx_variance8x16_msa, 0), + make_tuple(3, 3, &vpx_variance8x8_msa, 0), + make_tuple(3, 2, &vpx_variance8x4_msa, 0), + make_tuple(2, 3, &vpx_variance4x8_msa, 0), + make_tuple(2, 2, &vpx_variance4x4_msa, 0))); + INSTANTIATE_TEST_CASE_P( MSA, VpxSubpelVarianceTest, - ::testing::Values(make_tuple(2, 2, subpel_variance4x4_msa, 0), - make_tuple(2, 3, subpel_variance4x8_msa, 0), - make_tuple(3, 2, subpel_variance8x4_msa, 0), - make_tuple(3, 3, subpel_variance8x8_msa, 0), - make_tuple(3, 4, subpel_variance8x16_msa, 0), - make_tuple(4, 3, subpel_variance16x8_msa, 0), - make_tuple(4, 4, subpel_variance16x16_msa, 0), - make_tuple(4, 5, subpel_variance16x32_msa, 0), - make_tuple(5, 4, subpel_variance32x16_msa, 0), - make_tuple(5, 5, subpel_variance32x32_msa, 0), - make_tuple(5, 6, subpel_variance32x64_msa, 0), - make_tuple(6, 5, subpel_variance64x32_msa, 0), - make_tuple(6, 6, subpel_variance64x64_msa, 0))); - -const SubpixAvgVarMxNFunc subpel_avg_variance64x64_msa = - vpx_sub_pixel_avg_variance64x64_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance64x32_msa = - vpx_sub_pixel_avg_variance64x32_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance32x64_msa = - vpx_sub_pixel_avg_variance32x64_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance32x32_msa = - vpx_sub_pixel_avg_variance32x32_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance32x16_msa = - vpx_sub_pixel_avg_variance32x16_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance16x32_msa = - vpx_sub_pixel_avg_variance16x32_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance16x16_msa = - vpx_sub_pixel_avg_variance16x16_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance16x8_msa = - vpx_sub_pixel_avg_variance16x8_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance8x16_msa = - vpx_sub_pixel_avg_variance8x16_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance8x8_msa = - vpx_sub_pixel_avg_variance8x8_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance8x4_msa = - vpx_sub_pixel_avg_variance8x4_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance4x8_msa = - vpx_sub_pixel_avg_variance4x8_msa; -const SubpixAvgVarMxNFunc subpel_avg_variance4x4_msa = - vpx_sub_pixel_avg_variance4x4_msa; + ::testing::Values(make_tuple(2, 2, &vpx_sub_pixel_variance4x4_msa, 0), + make_tuple(2, 3, &vpx_sub_pixel_variance4x8_msa, 0), + make_tuple(3, 2, &vpx_sub_pixel_variance8x4_msa, 0), + make_tuple(3, 3, &vpx_sub_pixel_variance8x8_msa, 0), + make_tuple(3, 4, &vpx_sub_pixel_variance8x16_msa, 0), + make_tuple(4, 3, &vpx_sub_pixel_variance16x8_msa, 0), + make_tuple(4, 4, &vpx_sub_pixel_variance16x16_msa, 0), + make_tuple(4, 5, &vpx_sub_pixel_variance16x32_msa, 0), + make_tuple(5, 4, &vpx_sub_pixel_variance32x16_msa, 0), + make_tuple(5, 5, &vpx_sub_pixel_variance32x32_msa, 0), + make_tuple(5, 6, &vpx_sub_pixel_variance32x64_msa, 0), + make_tuple(6, 5, &vpx_sub_pixel_variance64x32_msa, 0), + make_tuple(6, 6, &vpx_sub_pixel_variance64x64_msa, 0))); + INSTANTIATE_TEST_CASE_P( MSA, VpxSubpelAvgVarianceTest, - ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_msa, 0), - make_tuple(6, 5, subpel_avg_variance64x32_msa, 0), - make_tuple(5, 6, subpel_avg_variance32x64_msa, 0), - make_tuple(5, 5, subpel_avg_variance32x32_msa, 0), - make_tuple(5, 4, subpel_avg_variance32x16_msa, 0), - make_tuple(4, 5, subpel_avg_variance16x32_msa, 0), - make_tuple(4, 4, subpel_avg_variance16x16_msa, 0), - make_tuple(4, 3, subpel_avg_variance16x8_msa, 0), - make_tuple(3, 4, subpel_avg_variance8x16_msa, 0), - make_tuple(3, 3, subpel_avg_variance8x8_msa, 0), - make_tuple(3, 2, subpel_avg_variance8x4_msa, 0), - make_tuple(2, 3, subpel_avg_variance4x8_msa, 0), - make_tuple(2, 2, subpel_avg_variance4x4_msa, 0))); + ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_msa, 0), + make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_msa, 0), + make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_msa, 0), + make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_msa, 0), + make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_msa, 0), + make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_msa, 0), + make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_msa, 0), + make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_msa, 0), + make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_msa, 0), + make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_msa, 0), + make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_msa, 0), + make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0), + make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0))); #endif // HAVE_MSA } // namespace diff --git a/libvpx/test/vp10_dct_test.cc b/libvpx/test/vp10_dct_test.cc deleted file mode 100644 index b2c301ae3..000000000 --- a/libvpx/test/vp10_dct_test.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> -#include <stdlib.h> -#include <new> - -#include "third_party/googletest/src/include/gtest/gtest.h" -#include "test/acm_random.h" -#include "test/util.h" -#include "./vpx_config.h" -#include "vpx_ports/msvc.h" - -#undef CONFIG_COEFFICIENT_RANGE_CHECKING -#define CONFIG_COEFFICIENT_RANGE_CHECKING 1 -#include "vp10/encoder/dct.c" - -using libvpx_test::ACMRandom; - -namespace { -void reference_dct_1d(const double *in, double *out, int size) { - const double PI = 3.141592653589793238462643383279502884; - const double kInvSqrt2 = 0.707106781186547524400844362104; - for (int k = 0; k < size; ++k) { - out[k] = 0; - for (int n = 0; n < size; ++n) { - out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size)); - } - if (k == 0) - out[k] = out[k] * kInvSqrt2; - } -} - -typedef void (*FdctFuncRef)(const double *in, double *out, int size); -typedef void (*IdctFuncRef)(const double *in, double *out, int size); -typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out); -typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); - -class TransTestBase { - public: - virtual ~TransTestBase() {} - - protected: - void RunFwdAccuracyCheck() { - tran_low_t *input = new tran_low_t[txfm_size_]; - tran_low_t *output = new tran_low_t[txfm_size_]; - double *ref_input = new double[txfm_size_]; - double *ref_output = new double[txfm_size_]; - - ACMRandom rnd(ACMRandom::DeterministicSeed()); - const int count_test_block = 5000; - for (int ti = 0; ti < count_test_block; ++ti) { - for (int ni = 0; ni < txfm_size_; ++ni) { - input[ni] = rnd.Rand8() - rnd.Rand8(); - ref_input[ni] = static_cast<double>(input[ni]); - } - - fwd_txfm_(input, output); - fwd_txfm_ref_(ref_input, ref_output, txfm_size_); - - for (int ni = 0; ni < txfm_size_; ++ni) { - EXPECT_LE( - abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), - max_error_); - } - } - - delete[] input; - delete[] output; - delete[] ref_input; - delete[] ref_output; - } - - double max_error_; - int txfm_size_; - FdctFunc fwd_txfm_; - FdctFuncRef fwd_txfm_ref_; -}; - -typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam; -class Vp10FwdTxfm - : public TransTestBase, - public ::testing::TestWithParam<FdctParam> { - public: - virtual void SetUp() { - fwd_txfm_ = GET_PARAM(0); - fwd_txfm_ref_ = GET_PARAM(1); - txfm_size_ = GET_PARAM(2); - max_error_ = GET_PARAM(3); - } - virtual void TearDown() {} -}; - -TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) { - RunFwdAccuracyCheck(); -} - -INSTANTIATE_TEST_CASE_P( - C, Vp10FwdTxfm, - ::testing::Values( - FdctParam(&fdct4, &reference_dct_1d, 4, 1), - FdctParam(&fdct8, &reference_dct_1d, 8, 1), - FdctParam(&fdct16, &reference_dct_1d, 16, 2))); -} // namespace diff --git a/libvpx/test/vp10_inv_txfm_test.cc b/libvpx/test/vp10_inv_txfm_test.cc deleted file mode 100644 index c49081ef8..000000000 --- a/libvpx/test/vp10_inv_txfm_test.cc +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> -#include <stdlib.h> -#include <string.h> - -#include "third_party/googletest/src/include/gtest/gtest.h" - -#include "./vp10_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "test/acm_random.h" -#include "test/clear_system_state.h" -#include "test/register_state_check.h" -#include "test/util.h" -#include "vp10/common/blockd.h" -#include "vp10/common/scan.h" -#include "vpx/vpx_integer.h" -#include "vp10/common/vp10_inv_txfm.h" - -using libvpx_test::ACMRandom; - -namespace { -const double PI = 3.141592653589793238462643383279502884; -const double kInvSqrt2 = 0.707106781186547524400844362104; - -void reference_idct_1d(const double *in, double *out, int size) { - for (int n = 0; n < size; ++n) { - out[n] = 0; - for (int k = 0; k < size; ++k) { - if (k == 0) - out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); - else - out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); - } - } -} - -typedef void (*IdctFuncRef)(const double *in, double *out, int size); -typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); - -class TransTestBase { - public: - virtual ~TransTestBase() {} - - protected: - void RunInvAccuracyCheck() { - tran_low_t *input = new tran_low_t[txfm_size_]; - tran_low_t *output = new tran_low_t[txfm_size_]; - double *ref_input = new double[txfm_size_]; - double *ref_output = new double[txfm_size_]; - - ACMRandom rnd(ACMRandom::DeterministicSeed()); - const int count_test_block = 5000; - for (int ti = 0; ti < count_test_block; ++ti) { - for (int ni = 0; ni < txfm_size_; ++ni) { - input[ni] = rnd.Rand8() - rnd.Rand8(); - ref_input[ni] = static_cast<double>(input[ni]); - } - - fwd_txfm_(input, output); - fwd_txfm_ref_(ref_input, ref_output, txfm_size_); - - for (int ni = 0; ni < txfm_size_; ++ni) { - EXPECT_LE( - abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), - max_error_); - } - } - - delete[] input; - delete[] output; - delete[] ref_input; - delete[] ref_output; - } - - double max_error_; - int txfm_size_; - IdctFunc fwd_txfm_; - IdctFuncRef fwd_txfm_ref_; -}; - -typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam; -class Vp10InvTxfm - : public TransTestBase, - public ::testing::TestWithParam<IdctParam> { - public: - virtual void SetUp() { - fwd_txfm_ = GET_PARAM(0); - fwd_txfm_ref_ = GET_PARAM(1); - txfm_size_ = GET_PARAM(2); - max_error_ = GET_PARAM(3); - } - virtual void TearDown() {} -}; - -TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) { - RunInvAccuracyCheck(); -} - -INSTANTIATE_TEST_CASE_P( - C, Vp10InvTxfm, - ::testing::Values( - IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1), - IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2), - IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4), - IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6)) -); - -typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); -typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); -typedef std::tr1::tuple<FwdTxfmFunc, - InvTxfmFunc, - InvTxfmFunc, - TX_SIZE, int> PartialInvTxfmParam; -const int kMaxNumCoeffs = 1024; -class Vp10PartialIDctTest - : public ::testing::TestWithParam<PartialInvTxfmParam> { - public: - virtual ~Vp10PartialIDctTest() {} - virtual void SetUp() { - ftxfm_ = GET_PARAM(0); - full_itxfm_ = GET_PARAM(1); - partial_itxfm_ = GET_PARAM(2); - tx_size_ = GET_PARAM(3); - last_nonzero_ = GET_PARAM(4); - } - - virtual void TearDown() { libvpx_test::ClearSystemState(); } - - protected: - int last_nonzero_; - TX_SIZE tx_size_; - FwdTxfmFunc ftxfm_; - InvTxfmFunc full_itxfm_; - InvTxfmFunc partial_itxfm_; -}; - -TEST_P(Vp10PartialIDctTest, RunQuantCheck) { - ACMRandom rnd(ACMRandom::DeterministicSeed()); - int size; - switch (tx_size_) { - case TX_4X4: - size = 4; - break; - case TX_8X8: - size = 8; - break; - case TX_16X16: - size = 16; - break; - case TX_32X32: - size = 32; - break; - default: - FAIL() << "Wrong Size!"; - break; - } - DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); - - const int count_test_block = 1000; - const int block_size = size * size; - - DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]); - - int max_error = 0; - for (int i = 0; i < count_test_block; ++i) { - // clear out destination buffer - memset(dst1, 0, sizeof(*dst1) * block_size); - memset(dst2, 0, sizeof(*dst2) * block_size); - memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); - memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); - - ACMRandom rnd(ACMRandom::DeterministicSeed()); - - for (int i = 0; i < count_test_block; ++i) { - // Initialize a test block with input range [-255, 255]. - if (i == 0) { - for (int j = 0; j < block_size; ++j) - input_extreme_block[j] = 255; - } else if (i == 1) { - for (int j = 0; j < block_size; ++j) - input_extreme_block[j] = -255; - } else { - for (int j = 0; j < block_size; ++j) { - input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; - } - } - - ftxfm_(input_extreme_block, output_ref_block, size); - - // quantization with maximum allowed step sizes - test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336; - for (int j = 1; j < last_nonzero_; ++j) - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] - = (output_ref_block[j] / 1828) * 1828; - } - - ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); - ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size)); - - for (int j = 0; j < block_size; ++j) { - const int diff = dst1[j] - dst2[j]; - const int error = diff * diff; - if (max_error < error) - max_error = error; - } - } - - EXPECT_EQ(0, max_error) - << "Error: partial inverse transform produces different results"; -} - -TEST_P(Vp10PartialIDctTest, ResultsMatch) { - ACMRandom rnd(ACMRandom::DeterministicSeed()); - int size; - switch (tx_size_) { - case TX_4X4: - size = 4; - break; - case TX_8X8: - size = 8; - break; - case TX_16X16: - size = 16; - break; - case TX_32X32: - size = 32; - break; - default: - FAIL() << "Wrong Size!"; - break; - } - DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); - DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); - const int count_test_block = 1000; - const int max_coeff = 32766 / 4; - const int block_size = size * size; - int max_error = 0; - for (int i = 0; i < count_test_block; ++i) { - // clear out destination buffer - memset(dst1, 0, sizeof(*dst1) * block_size); - memset(dst2, 0, sizeof(*dst2) * block_size); - memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); - memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); - int max_energy_leftover = max_coeff * max_coeff; - for (int j = 0; j < last_nonzero_; ++j) { - int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) * - (rnd.Rand16() - 32768) / 65536); - max_energy_leftover -= coef * coef; - if (max_energy_leftover < 0) { - max_energy_leftover = 0; - coef = 0; - } - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef; - } - - memcpy(test_coef_block2, test_coef_block1, - sizeof(*test_coef_block2) * block_size); - - ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); - ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size)); - - for (int j = 0; j < block_size; ++j) { - const int diff = dst1[j] - dst2[j]; - const int error = diff * diff; - if (max_error < error) - max_error = error; - } - } - - EXPECT_EQ(0, max_error) - << "Error: partial inverse transform produces different results"; -} -using std::tr1::make_tuple; - -INSTANTIATE_TEST_CASE_P( - C, Vp10PartialIDctTest, - ::testing::Values( - make_tuple(&vpx_fdct32x32_c, - &vp10_idct32x32_1024_add_c, - &vp10_idct32x32_34_add_c, - TX_32X32, 34), - make_tuple(&vpx_fdct32x32_c, - &vp10_idct32x32_1024_add_c, - &vp10_idct32x32_1_add_c, - TX_32X32, 1), - make_tuple(&vpx_fdct16x16_c, - &vp10_idct16x16_256_add_c, - &vp10_idct16x16_10_add_c, - TX_16X16, 10), - make_tuple(&vpx_fdct16x16_c, - &vp10_idct16x16_256_add_c, - &vp10_idct16x16_1_add_c, - TX_16X16, 1), - make_tuple(&vpx_fdct8x8_c, - &vp10_idct8x8_64_add_c, - &vp10_idct8x8_12_add_c, - TX_8X8, 12), - make_tuple(&vpx_fdct8x8_c, - &vp10_idct8x8_64_add_c, - &vp10_idct8x8_1_add_c, - TX_8X8, 1), - make_tuple(&vpx_fdct4x4_c, - &vp10_idct4x4_16_add_c, - &vp10_idct4x4_1_add_c, - TX_4X4, 1))); -} // namespace diff --git a/libvpx/test/vp9_arf_freq_test.cc b/libvpx/test/vp9_arf_freq_test.cc index 89200d408..aa3e34d62 100644 --- a/libvpx/test/vp9_arf_freq_test.cc +++ b/libvpx/test/vp9_arf_freq_test.cc @@ -229,24 +229,4 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); - -#if CONFIG_VP9_HIGHBITDEPTH -# if CONFIG_VP10_ENCODER -// TODO(angiebird): 25-29 fail in high bitdepth mode. -INSTANTIATE_TEST_CASE_P( - DISABLED_VP10, ArfFreqTest, - ::testing::Combine( - ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( - &libvpx_test::kVP10)), - ::testing::ValuesIn(kTestVectors), - ::testing::ValuesIn(kEncodeVectors), - ::testing::ValuesIn(kMinArfVectors))); -# endif // CONFIG_VP10_ENCODER -#else -VP10_INSTANTIATE_TEST_CASE( - ArfFreqTest, - ::testing::ValuesIn(kTestVectors), - ::testing::ValuesIn(kEncodeVectors), - ::testing::ValuesIn(kMinArfVectors)); -#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/libvpx/test/vp9_denoiser_sse2_test.cc b/libvpx/test/vp9_denoiser_sse2_test.cc index 17c799dff..c84d7ff01 100644 --- a/libvpx/test/vp9_denoiser_sse2_test.cc +++ b/libvpx/test/vp9_denoiser_sse2_test.cc @@ -94,8 +94,7 @@ TEST_P(VP9DenoiserTest, BitexactCheck) { // Test for all block size. INSTANTIATE_TEST_CASE_P( SSE2, VP9DenoiserTest, - ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, - BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, - BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, - BLOCK_64X64)); + ::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, + BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, + BLOCK_64X32, BLOCK_64X64)); } // namespace diff --git a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc index 3ef6022ad..bd8409879 100644 --- a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc +++ b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc @@ -45,9 +45,9 @@ struct EncodeParameters { }; const EncodeParameters kVP9EncodeParameterSet[] = { - {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601}, - {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709}, - {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020}, + {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601, { 0, 0 }}, + {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709, { 0, 0 }}, + {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020, { 0, 0 }}, {0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }}, // TODO(JBB): Test profiles (requires more work). }; @@ -93,7 +93,7 @@ class VpxEncoderParmsGetToDecoder } virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, - const libvpx_test::VideoSource &video, + const libvpx_test::VideoSource & /*video*/, libvpx_test::Decoder *decoder) { vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder(); vpx_codec_alg_priv_t *const priv = diff --git a/libvpx/test/vp9_end_to_end_test.cc b/libvpx/test/vp9_end_to_end_test.cc index be1fa68c0..666919f4a 100644 --- a/libvpx/test/vp9_end_to_end_test.cc +++ b/libvpx/test/vp9_end_to_end_test.cc @@ -186,24 +186,4 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); - -#if CONFIG_VP9_HIGHBITDEPTH -# if CONFIG_VP10_ENCODER -// TODO(angiebird): many fail in high bitdepth mode. -INSTANTIATE_TEST_CASE_P( - DISABLED_VP10, EndToEndTestLarge, - ::testing::Combine( - ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( - &libvpx_test::kVP10)), - ::testing::ValuesIn(kEncodingModeVectors), - ::testing::ValuesIn(kTestVectors), - ::testing::ValuesIn(kCpuUsedVectors))); -# endif // CONFIG_VP10_ENCODER -#else -VP10_INSTANTIATE_TEST_CASE( - EndToEndTestLarge, - ::testing::ValuesIn(kEncodingModeVectors), - ::testing::ValuesIn(kTestVectors), - ::testing::ValuesIn(kCpuUsedVectors)); -#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/libvpx/test/vp9_error_block_test.cc b/libvpx/test/vp9_error_block_test.cc index 77b12ea8d..23a249e2b 100644 --- a/libvpx/test/vp9_error_block_test.cc +++ b/libvpx/test/vp9_error_block_test.cc @@ -164,7 +164,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bps) { - assert(bps == 8); + EXPECT_EQ(8, bps); return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz); } @@ -173,7 +173,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bps) { - assert(bps == 8); + EXPECT_EQ(8, bps); return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz); } @@ -195,7 +195,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_avx(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bps) { - assert(bps == 8); + EXPECT_EQ(8, bps); return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz); } diff --git a/libvpx/test/vp9_ethread_test.cc b/libvpx/test/vp9_ethread_test.cc index 63f6dfea7..62b91094f 100644 --- a/libvpx/test/vp9_ethread_test.cc +++ b/libvpx/test/vp9_ethread_test.cc @@ -29,16 +29,9 @@ class VPxEncoderThreadTest encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) { init_flags_ = VPX_CODEC_USE_PSNR; - vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); - cfg.w = 1280; - cfg.h = 720; - decoder_ = codec_->CreateDecoder(cfg, 0); - md5_.clear(); } - virtual ~VPxEncoderThreadTest() { - delete decoder_; - } + virtual ~VPxEncoderThreadTest() {} virtual void SetUp() { InitializeConfig(); @@ -48,7 +41,7 @@ class VPxEncoderThreadTest cfg_.g_lag_in_frames = 3; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_2pass_vbr_minsection_pct = 5; - cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; @@ -62,7 +55,7 @@ class VPxEncoderThreadTest encoder_initialized_ = false; } - virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/, ::libvpx_test::Encoder *encoder) { if (!encoder_initialized_) { // Encode 4 column tiles. @@ -81,27 +74,28 @@ class VPxEncoderThreadTest } } - virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { - const vpx_codec_err_t res = decoder_->DecodeFrame( - reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz); + virtual void DecompressedFrameHook(const vpx_image_t &img, + vpx_codec_pts_t /*pts*/) { + ::libvpx_test::MD5 md5_res; + md5_res.Add(&img); + md5_.push_back(md5_res.Get()); + } + + virtual bool HandleDecodeResult(const vpx_codec_err_t res, + const libvpx_test::VideoSource& /*video*/, + libvpx_test::Decoder * /*decoder*/) { if (res != VPX_CODEC_OK) { - abort_ = true; - ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(VPX_CODEC_OK, res); + return false; } - const vpx_image_t *img = decoder_->GetDxData().Next(); - if (img) { - ::libvpx_test::MD5 md5_res; - md5_res.Add(img); - md5_.push_back(md5_res.Get()); - } + return true; } bool encoder_initialized_; int tiles_; ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; - ::libvpx_test::Decoder *decoder_; std::vector<std::string> md5_; }; @@ -134,9 +128,4 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(1, 9)); - -VP10_INSTANTIATE_TEST_CASE( - VPxEncoderThreadTest, - ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), - ::testing::Range(1, 3)); } // namespace diff --git a/libvpx/test/vp9_intrapred_test.cc b/libvpx/test/vp9_intrapred_test.cc index ad3327e2d..416f3c322 100644 --- a/libvpx/test/vp9_intrapred_test.cc +++ b/libvpx/test/vp9_intrapred_test.cc @@ -34,7 +34,7 @@ class VP9IntraPredBase { virtual ~VP9IntraPredBase() { libvpx_test::ClearSystemState(); } protected: - virtual void Predict(PREDICTION_MODE mode) = 0; + virtual void Predict() = 0; void CheckPrediction(int test_case_number, int *error_count) const { // For each pixel ensure that the calculated value is the same as reference. @@ -73,7 +73,7 @@ class VP9IntraPredBase { left_col_[y] = rnd.Rand16() & mask_; } } - Predict(DC_PRED); + Predict(); CheckPrediction(i, &error_count); } ASSERT_EQ(0, error_count); @@ -106,7 +106,7 @@ class VP9IntraPredTest mask_ = (1 << bit_depth_) - 1; } - virtual void Predict(PREDICTION_MODE mode) { + virtual void Predict() { const uint16_t *const_above_row = above_row_; const uint16_t *const_left_col = left_col_; ref_fn_(ref_dst_, stride_, const_above_row, const_left_col, bit_depth_); @@ -132,7 +132,6 @@ using std::tr1::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_USE_X86INC -#if ARCH_X86_64 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, ::testing::Values( make_tuple(&vpx_highbd_dc_predictor_32x32_sse2, @@ -141,13 +140,13 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, &vpx_highbd_tm_predictor_16x16_c, 16, 8), make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 8), - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, + make_tuple(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 8), make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 8), make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 8), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, + make_tuple(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 8), make_tuple(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 8), @@ -155,34 +154,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, &vpx_highbd_v_predictor_16x16_c, 16, 8), make_tuple(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 8), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, + make_tuple(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 8), make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 8))); -#else -INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, - ::testing::Values( - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, - &vpx_highbd_dc_predictor_4x4_c, 4, 8), - make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, - &vpx_highbd_dc_predictor_8x8_c, 8, 8), - make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, - &vpx_highbd_dc_predictor_16x16_c, 16, 8), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, - &vpx_highbd_v_predictor_4x4_c, 4, 8), - make_tuple(&vpx_highbd_v_predictor_8x8_sse2, - &vpx_highbd_v_predictor_8x8_c, 8, 8), - make_tuple(&vpx_highbd_v_predictor_16x16_sse2, - &vpx_highbd_v_predictor_16x16_c, 16, 8), - make_tuple(&vpx_highbd_v_predictor_32x32_sse2, - &vpx_highbd_v_predictor_32x32_c, 32, 8), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, - &vpx_highbd_tm_predictor_4x4_c, 4, 8), - make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, - &vpx_highbd_tm_predictor_8x8_c, 8, 8))); -#endif // !ARCH_X86_64 -#if ARCH_X86_64 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, ::testing::Values( make_tuple(&vpx_highbd_dc_predictor_32x32_sse2, @@ -194,14 +170,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 10), - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, + make_tuple(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 10), make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 10), make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 10), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, + make_tuple(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 10), make_tuple(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 10), @@ -211,35 +187,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, make_tuple(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 10), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, - &vpx_highbd_tm_predictor_4x4_c, 4, 10), - make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, - &vpx_highbd_tm_predictor_8x8_c, 8, 10))); -#else -INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest, - ::testing::Values( - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, - &vpx_highbd_dc_predictor_4x4_c, 4, 10), - make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, - &vpx_highbd_dc_predictor_8x8_c, 8, 10), - make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, - &vpx_highbd_dc_predictor_16x16_c, 16, - 10), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, - &vpx_highbd_v_predictor_4x4_c, 4, 10), - make_tuple(&vpx_highbd_v_predictor_8x8_sse2, - &vpx_highbd_v_predictor_8x8_c, 8, 10), - make_tuple(&vpx_highbd_v_predictor_16x16_sse2, - &vpx_highbd_v_predictor_16x16_c, 16, 10), - make_tuple(&vpx_highbd_v_predictor_32x32_sse2, - &vpx_highbd_v_predictor_32x32_c, 32, 10), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, + make_tuple(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 10), make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 10))); -#endif // !ARCH_X86_64 -#if ARCH_X86_64 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, ::testing::Values( make_tuple(&vpx_highbd_dc_predictor_32x32_sse2, @@ -251,14 +203,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, make_tuple(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 12), - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, + make_tuple(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 12), make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 12), make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 12), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, + make_tuple(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 12), make_tuple(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 12), @@ -268,33 +220,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, make_tuple(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 12), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, + make_tuple(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 12), make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 12))); -#else -INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, - ::testing::Values( - make_tuple(&vpx_highbd_dc_predictor_4x4_sse, - &vpx_highbd_dc_predictor_4x4_c, 4, 12), - make_tuple(&vpx_highbd_dc_predictor_8x8_sse2, - &vpx_highbd_dc_predictor_8x8_c, 8, 12), - make_tuple(&vpx_highbd_dc_predictor_16x16_sse2, - &vpx_highbd_dc_predictor_16x16_c, 16, - 12), - make_tuple(&vpx_highbd_v_predictor_4x4_sse, - &vpx_highbd_v_predictor_4x4_c, 4, 12), - make_tuple(&vpx_highbd_v_predictor_8x8_sse2, - &vpx_highbd_v_predictor_8x8_c, 8, 12), - make_tuple(&vpx_highbd_v_predictor_16x16_sse2, - &vpx_highbd_v_predictor_16x16_c, 16, 12), - make_tuple(&vpx_highbd_v_predictor_32x32_sse2, - &vpx_highbd_v_predictor_32x32_c, 32, 12), - make_tuple(&vpx_highbd_tm_predictor_4x4_sse, - &vpx_highbd_tm_predictor_4x4_c, 4, 12), - make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, - &vpx_highbd_tm_predictor_8x8_c, 8, 12))); -#endif // !ARCH_X86_64 + #endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 diff --git a/libvpx/test/vp9_lossless_test.cc b/libvpx/test/vp9_lossless_test.cc index 09c1070c6..417739315 100644 --- a/libvpx/test/vp9_lossless_test.cc +++ b/libvpx/test/vp9_lossless_test.cc @@ -127,8 +127,4 @@ VP9_INSTANTIATE_TEST_CASE(LosslessTest, ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood)); - -VP10_INSTANTIATE_TEST_CASE(LosslessTest, - ::testing::Values(::libvpx_test::kOnePassGood, - ::libvpx_test::kTwoPassGood)); } // namespace diff --git a/libvpx/test/vp9_spatial_svc_encoder.sh b/libvpx/test/vp9_spatial_svc_encoder.sh index 6dd5f171b..65031073f 100755 --- a/libvpx/test/vp9_spatial_svc_encoder.sh +++ b/libvpx/test/vp9_spatial_svc_encoder.sh @@ -54,7 +54,7 @@ vp9_spatial_svc() { if [ "$(vp9_encode_available)" = "yes" ]; then local readonly test_name="vp9_spatial_svc" for layers in $(seq 1 ${vp9_ssvc_test_layers}); do - vp9_spatial_svc_encoder "${test_name}" -l ${layers} + vp9_spatial_svc_encoder "${test_name}" -sl ${layers} done fi } diff --git a/libvpx/test/webm_video_source.h b/libvpx/test/webm_video_source.h index 650bc52dc..825875687 100644 --- a/libvpx/test/webm_video_source.h +++ b/libvpx/test/webm_video_source.h @@ -62,7 +62,7 @@ class WebMVideoSource : public CompressedVideoSource { void FillFrame() { ASSERT_TRUE(vpx_ctx_->file != NULL); - const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_); + const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); ASSERT_GE(status, 0) << "webm_read_frame failed"; if (status == 1) { end_of_file_ = true; @@ -72,7 +72,7 @@ class WebMVideoSource : public CompressedVideoSource { void SeekToNextKeyFrame() { ASSERT_TRUE(vpx_ctx_->file != NULL); do { - const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_); + const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); ASSERT_GE(status, 0) << "webm_read_frame failed"; ++frame_; if (status == 1) { diff --git a/libvpx/third_party/googletest/README.libvpx b/libvpx/third_party/googletest/README.libvpx index 7201a67d3..1eca78dd9 100644 --- a/libvpx/third_party/googletest/README.libvpx +++ b/libvpx/third_party/googletest/README.libvpx @@ -12,4 +12,8 @@ failures, various options for running the tests, and XML test report generation. Local Modifications: -Removed unused declarations of kPathSeparatorString to have warning free build.
\ No newline at end of file +- Removed unused declarations of kPathSeparatorString to have warning + free build. +- Added GTEST_ATTRIBUTE_UNUSED_ to test registering dummies in TEST_P + and INSTANTIATE_TEST_CASE_P to remove warnings about unused variables + under GCC 5.
\ No newline at end of file diff --git a/libvpx/third_party/googletest/src/include/gtest/gtest.h b/libvpx/third_party/googletest/src/include/gtest/gtest.h index 4f3804f70..581a44e95 100644 --- a/libvpx/third_party/googletest/src/include/gtest/gtest.h +++ b/libvpx/third_party/googletest/src/include/gtest/gtest.h @@ -16960,7 +16960,7 @@ internal::CartesianProductHolder10<Generator1, Generator2, Generator3, GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ return 0; \ } \ - static int gtest_registering_dummy_; \ + static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ }; \ @@ -16972,7 +16972,7 @@ internal::CartesianProductHolder10<Generator1, Generator2, Generator3, # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \ ::testing::internal::ParamGenerator<test_case_name::ParamType> \ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ - int gtest_##prefix##test_case_name##_dummy_ = \ + int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder<test_case_name>(\ #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\ diff --git a/libvpx/third_party/libwebm/Android.mk b/libvpx/third_party/libwebm/Android.mk index be9d77dee..8149a083f 100644 --- a/libvpx/third_party/libwebm/Android.mk +++ b/libvpx/third_party/libwebm/Android.mk @@ -2,9 +2,16 @@ LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE:= libwebm -LOCAL_SRC_FILES:= mkvparser.cpp \ - mkvreader.cpp \ - mkvmuxer.cpp \ - mkvmuxerutil.cpp \ - mkvwriter.cpp +LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS +LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat +LOCAL_C_INCLUDES:= $(LOCAL_PATH) +LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH) + +LOCAL_SRC_FILES:= common/file_util.cc \ + common/hdr_util.cc \ + mkvparser/mkvparser.cc \ + mkvparser/mkvreader.cc \ + mkvmuxer/mkvmuxer.cc \ + mkvmuxer/mkvmuxerutil.cc \ + mkvmuxer/mkvwriter.cc include $(BUILD_STATIC_LIBRARY) diff --git a/libvpx/third_party/libwebm/README.libvpx b/libvpx/third_party/libwebm/README.libvpx index 2989d3d89..73f830322 100644 --- a/libvpx/third_party/libwebm/README.libvpx +++ b/libvpx/third_party/libwebm/README.libvpx @@ -1,5 +1,5 @@ URL: https://chromium.googlesource.com/webm/libwebm -Version: 476366249e1fda7710a389cd41c57db42305e0d4 +Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d License: BSD License File: LICENSE.txt diff --git a/libvpx/third_party/libwebm/RELEASE.TXT b/libvpx/third_party/libwebm/RELEASE.TXT deleted file mode 100644 index a7e9f032c..000000000 --- a/libvpx/third_party/libwebm/RELEASE.TXT +++ /dev/null @@ -1,34 +0,0 @@ -1.0.0.5
- * Handled case when no duration
- * Handled empty clusters
- * Handled empty clusters when seeking
- * Implemented check lacing bits
-
-1.0.0.4
- * Made Cues member variables mutables
- * Defined against badly-formatted cue points
- * Segment::GetCluster returns CuePoint too
- * Separated cue-based searches
-
-1.0.0.3
- * Added Block::GetOffset() to get a frame's offset in a block
- * Changed cluster count type from size_t to long
- * Parsed SeekHead to find cues
- * Allowed seeking beyond end of cluster cache
- * Added not to attempt to reparse cues element
- * Restructured Segment::LoadCluster
- * Marked position of cues without parsing cues element
- * Allowed cue points to be loaded incrementally
- * Implemented to load lazily cue points as they're searched
- * Merged Cues::LoadCuePoint into Cues::Find
- * Lazy init cues
- * Loaded cue point during find
-
-1.0.0.2
- * added support for Cues element
- * seeking was improved
-
-1.0.0.1
- * fixed item 141
- * added item 142
- * added this file, RELEASE.TXT, to repository
diff --git a/libvpx/third_party/libwebm/common/file_util.cc b/libvpx/third_party/libwebm/common/file_util.cc new file mode 100644 index 000000000..4f91318f3 --- /dev/null +++ b/libvpx/third_party/libwebm/common/file_util.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2016 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#include "common/file_util.h" + +#include <sys/stat.h> +#ifndef _MSC_VER +#include <unistd.h> // close() +#endif + +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <ios> + +namespace libwebm { + +std::string GetTempFileName() { +#if !defined _MSC_VER && !defined __MINGW32__ + char temp_file_name_template[] = "libwebm_temp.XXXXXX"; + int fd = mkstemp(temp_file_name_template); + if (fd != -1) { + close(fd); + return std::string(temp_file_name_template); + } + return std::string(); +#else + char tmp_file_name[_MAX_PATH]; + errno_t err = tmpnam_s(tmp_file_name); + if (err == 0) { + return std::string(tmp_file_name); + } + return std::string(); +#endif +} + +uint64_t GetFileSize(const std::string& file_name) { + uint64_t file_size = 0; +#ifndef _MSC_VER + struct stat st; + st.st_size = 0; + if (stat(file_name.c_str(), &st) == 0) { +#else + struct _stat st; + st.st_size = 0; + if (_stat(file_name.c_str(), &st) == 0) { +#endif + file_size = st.st_size; + } + return file_size; +} + +TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); } + +TempFileDeleter::~TempFileDeleter() { + std::ifstream file(file_name_.c_str()); + if (file.good()) { + file.close(); + std::remove(file_name_.c_str()); + } +} + +} // namespace libwebm diff --git a/libvpx/third_party/libwebm/common/file_util.h b/libvpx/third_party/libwebm/common/file_util.h new file mode 100644 index 000000000..0e71eac11 --- /dev/null +++ b/libvpx/third_party/libwebm/common/file_util.h @@ -0,0 +1,41 @@ +// Copyright (c) 2016 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#ifndef LIBWEBM_COMMON_FILE_UTIL_H_ +#define LIBWEBM_COMMON_FILE_UTIL_H_ + +#include <stdint.h> + +#include <string> + +#include "mkvmuxer/mkvmuxertypes.h" // LIBWEBM_DISALLOW_COPY_AND_ASSIGN() + +namespace libwebm { + +// Returns a temporary file name. +std::string GetTempFileName(); + +// Returns size of file specified by |file_name|, or 0 upon failure. +uint64_t GetFileSize(const std::string& file_name); + +// Manages life of temporary file specified at time of construction. Deletes +// file upon destruction. +class TempFileDeleter { + public: + TempFileDeleter(); + explicit TempFileDeleter(std::string file_name) : file_name_(file_name) {} + ~TempFileDeleter(); + const std::string& name() const { return file_name_; } + + private: + std::string file_name_; + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TempFileDeleter); +}; + +} // namespace libwebm + +#endif // LIBWEBM_COMMON_FILE_UTIL_H_
\ No newline at end of file diff --git a/libvpx/third_party/libwebm/common/hdr_util.cc b/libvpx/third_party/libwebm/common/hdr_util.cc new file mode 100644 index 000000000..e1a9842fb --- /dev/null +++ b/libvpx/third_party/libwebm/common/hdr_util.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2016 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#include "hdr_util.h" + +#include <cstddef> +#include <new> + +#include "mkvparser/mkvparser.h" + +namespace libwebm { +bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, + PrimaryChromaticityPtr* muxer_pc) { + muxer_pc->reset(new (std::nothrow) + mkvmuxer::PrimaryChromaticity(parser_pc.x, parser_pc.y)); + if (!muxer_pc->get()) + return false; + return true; +} + +bool MasteringMetadataValuePresent(double value) { + return value != mkvparser::MasteringMetadata::kValueNotPresent; +} + +bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, + mkvmuxer::MasteringMetadata* muxer_mm) { + if (MasteringMetadataValuePresent(parser_mm.luminance_max)) + muxer_mm->luminance_max = parser_mm.luminance_max; + if (MasteringMetadataValuePresent(parser_mm.luminance_min)) + muxer_mm->luminance_min = parser_mm.luminance_min; + + PrimaryChromaticityPtr r_ptr(NULL); + PrimaryChromaticityPtr g_ptr(NULL); + PrimaryChromaticityPtr b_ptr(NULL); + PrimaryChromaticityPtr wp_ptr(NULL); + + if (parser_mm.r) { + if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr)) + return false; + } + if (parser_mm.g) { + if (!CopyPrimaryChromaticity(*parser_mm.g, &g_ptr)) + return false; + } + if (parser_mm.b) { + if (!CopyPrimaryChromaticity(*parser_mm.b, &b_ptr)) + return false; + } + if (parser_mm.white_point) { + if (!CopyPrimaryChromaticity(*parser_mm.white_point, &wp_ptr)) + return false; + } + + if (!muxer_mm->SetChromaticity(r_ptr.get(), g_ptr.get(), b_ptr.get(), + wp_ptr.get())) { + return false; + } + + return true; +} + +bool ColourValuePresent(long long value) { + return value != mkvparser::Colour::kValueNotPresent; +} + +bool CopyColour(const mkvparser::Colour& parser_colour, + mkvmuxer::Colour* muxer_colour) { + if (!muxer_colour) + return false; + + if (ColourValuePresent(parser_colour.matrix_coefficients)) + muxer_colour->matrix_coefficients = parser_colour.matrix_coefficients; + if (ColourValuePresent(parser_colour.bits_per_channel)) + muxer_colour->bits_per_channel = parser_colour.bits_per_channel; + if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) + muxer_colour->chroma_subsampling_horz = + parser_colour.chroma_subsampling_horz; + if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) + muxer_colour->chroma_subsampling_vert = + parser_colour.chroma_subsampling_vert; + if (ColourValuePresent(parser_colour.cb_subsampling_horz)) + muxer_colour->cb_subsampling_horz = parser_colour.cb_subsampling_horz; + if (ColourValuePresent(parser_colour.cb_subsampling_vert)) + muxer_colour->cb_subsampling_vert = parser_colour.cb_subsampling_vert; + if (ColourValuePresent(parser_colour.chroma_siting_horz)) + muxer_colour->chroma_siting_horz = parser_colour.chroma_siting_horz; + if (ColourValuePresent(parser_colour.chroma_siting_vert)) + muxer_colour->chroma_siting_vert = parser_colour.chroma_siting_vert; + if (ColourValuePresent(parser_colour.range)) + muxer_colour->range = parser_colour.range; + if (ColourValuePresent(parser_colour.transfer_characteristics)) + muxer_colour->transfer_characteristics = + parser_colour.transfer_characteristics; + if (ColourValuePresent(parser_colour.primaries)) + muxer_colour->primaries = parser_colour.primaries; + if (ColourValuePresent(parser_colour.max_cll)) + muxer_colour->max_cll = parser_colour.max_cll; + if (ColourValuePresent(parser_colour.max_fall)) + muxer_colour->max_fall = parser_colour.max_fall; + + if (parser_colour.mastering_metadata) { + mkvmuxer::MasteringMetadata muxer_mm; + if (!CopyMasteringMetadata(*parser_colour.mastering_metadata, &muxer_mm)) + return false; + if (!muxer_colour->SetMasteringMetadata(muxer_mm)) + return false; + } + return true; +} + +// Format of VPx private data: +// +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | ID Byte | Length | | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +// | | +// : Bytes 1..Length of Codec Feature : +// | | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// ID Byte Format +// ID byte is an unsigned byte. +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |X| ID | +// +-+-+-+-+-+-+-+-+ +// +// The X bit is reserved. +// +// Currently only profile level is supported. ID byte must be set to 1, and +// length must be 1. Supported values are: +// +// 10: Level 1 +// 11: Level 1.1 +// 20: Level 2 +// 21: Level 2.1 +// 30: Level 3 +// 31: Level 3.1 +// 40: Level 4 +// 41: Level 4.1 +// 50: Level 5 +// 51: Level 5.1 +// 52: Level 5.2 +// 60: Level 6 +// 61: Level 6.1 +// 62: Level 6.2 +// +// See the following link for more information: +// http://www.webmproject.org/vp9/profiles/ +int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length) { + const int kVpxCodecPrivateLength = 3; + if (!private_data || length != kVpxCodecPrivateLength) + return 0; + + const uint8_t id_byte = *private_data; + if (id_byte != 1) + return 0; + + const int kVpxProfileLength = 1; + const uint8_t length_byte = private_data[1]; + if (length_byte != kVpxProfileLength) + return 0; + + const int level = static_cast<int>(private_data[2]); + + const int kNumLevels = 14; + const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40, + 41, 50, 51, 52, 60, 61, 62}; + + for (int i = 0; i < kNumLevels; ++i) { + if (level == levels[i]) + return level; + } + + return 0; +} +} // namespace libwebm diff --git a/libvpx/third_party/libwebm/common/hdr_util.h b/libvpx/third_party/libwebm/common/hdr_util.h new file mode 100644 index 000000000..d30c2b9f2 --- /dev/null +++ b/libvpx/third_party/libwebm/common/hdr_util.h @@ -0,0 +1,51 @@ +// Copyright (c) 2016 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#ifndef LIBWEBM_COMMON_HDR_UTIL_H_ +#define LIBWEBM_COMMON_HDR_UTIL_H_ + +#include <stdint.h> + +#include <memory> + +#include "mkvmuxer/mkvmuxer.h" + +namespace mkvparser { +struct Colour; +struct MasteringMetadata; +struct PrimaryChromaticity; +} // namespace mkvparser + +namespace libwebm { +// Utility types and functions for working with the Colour element and its +// children. Copiers return true upon success. Presence functions return true +// when the specified element is present. + +// TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is +// required by libwebm. + +typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr; + +bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, + PrimaryChromaticityPtr* muxer_pc); + +bool MasteringMetadataValuePresent(double value); + +bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, + mkvmuxer::MasteringMetadata* muxer_mm); + +bool ColourValuePresent(long long value); + +bool CopyColour(const mkvparser::Colour& parser_colour, + mkvmuxer::Colour* muxer_colour); + +// Returns VP9 profile upon success or 0 upon failure. +int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length); + +} // namespace libwebm + +#endif // LIBWEBM_COMMON_HDR_UTIL_H_ diff --git a/libvpx/third_party/libwebm/webmids.hpp b/libvpx/third_party/libwebm/common/webmids.h index ad4ab5738..32a0c5fb9 100644 --- a/libvpx/third_party/libwebm/webmids.hpp +++ b/libvpx/third_party/libwebm/common/webmids.h @@ -6,10 +6,10 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#ifndef WEBMIDS_HPP -#define WEBMIDS_HPP +#ifndef COMMON_WEBMIDS_H_ +#define COMMON_WEBMIDS_H_ -namespace mkvmuxer { +namespace libwebm { enum MkvId { kMkvEBML = 0x1A45DFA3, @@ -95,6 +95,35 @@ enum MkvId { kMkvAspectRatioType = 0x54B3, kMkvFrameRate = 0x2383E3, // end video + // colour + kMkvColour = 0x55B0, + kMkvMatrixCoefficients = 0x55B1, + kMkvBitsPerChannel = 0x55B2, + kMkvChromaSubsamplingHorz = 0x55B3, + kMkvChromaSubsamplingVert = 0x55B4, + kMkvCbSubsamplingHorz = 0x55B5, + kMkvCbSubsamplingVert = 0x55B6, + kMkvChromaSitingHorz = 0x55B7, + kMkvChromaSitingVert = 0x55B8, + kMkvRange = 0x55B9, + kMkvTransferCharacteristics = 0x55BA, + kMkvPrimaries = 0x55BB, + kMkvMaxCLL = 0x55BC, + kMkvMaxFALL = 0x55BD, + // mastering metadata + kMkvMasteringMetadata = 0x55D0, + kMkvPrimaryRChromaticityX = 0x55D1, + kMkvPrimaryRChromaticityY = 0x55D2, + kMkvPrimaryGChromaticityX = 0x55D3, + kMkvPrimaryGChromaticityY = 0x55D4, + kMkvPrimaryBChromaticityX = 0x55D5, + kMkvPrimaryBChromaticityY = 0x55D6, + kMkvWhitePointChromaticityX = 0x55D7, + kMkvWhitePointChromaticityY = 0x55D8, + kMkvLuminanceMax = 0x55D9, + kMkvLuminanceMin = 0x55DA, + // end mastering metadata + // end colour // audio kMkvAudio = 0xE1, kMkvSamplingFrequency = 0xB5, @@ -150,6 +179,6 @@ enum MkvId { kMkvTagString = 0x4487 }; -} // end namespace mkvmuxer +} // namespace libwebm -#endif // WEBMIDS_HPP +#endif // COMMON_WEBMIDS_H_ diff --git a/libvpx/third_party/libwebm/mkvmuxer.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc index 9be3119a4..c79ce24ed 100644 --- a/libvpx/third_party/libwebm/mkvmuxer.cpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc @@ -6,27 +6,28 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#include "mkvmuxer.hpp" +#include "mkvmuxer/mkvmuxer.h" +#include <cfloat> #include <climits> #include <cstdio> #include <cstdlib> #include <cstring> #include <ctime> +#include <memory> #include <new> +#include <vector> -#include "mkvmuxerutil.hpp" -#include "mkvparser.hpp" -#include "mkvwriter.hpp" -#include "webmids.hpp" - -#ifdef _MSC_VER -// Disable MSVC warnings that suggest making code non-portable. -#pragma warning(disable : 4996) -#endif +#include "common/webmids.h" +#include "mkvmuxer/mkvmuxerutil.h" +#include "mkvmuxer/mkvwriter.h" +#include "mkvparser/mkvparser.h" namespace mkvmuxer { +const float MasteringMetadata::kValueNotPresent = FLT_MAX; +const uint64_t Colour::kValueNotPresent = UINT64_MAX; + namespace { // Deallocate the string designated by |dst|, and then copy the |src| // string to |dst|. The caller owns both the |src| string and the @@ -55,6 +56,20 @@ bool StrCpy(const char* src, char** dst_ptr) { strcpy(dst, src); // NOLINT return true; } + +typedef std::auto_ptr<PrimaryChromaticity> PrimaryChromaticityPtr; +bool CopyChromaticity(const PrimaryChromaticity* src, + PrimaryChromaticityPtr* dst) { + if (!dst) + return false; + + dst->reset(new (std::nothrow) PrimaryChromaticity(src->x, src->y)); + if (!dst->get()) + return false; + + return true; +} + } // namespace /////////////////////////////////////////////////////////////// @@ -65,31 +80,31 @@ IMkvWriter::IMkvWriter() {} IMkvWriter::~IMkvWriter() {} -bool WriteEbmlHeader(IMkvWriter* writer, uint64 doc_type_version) { +bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) { // Level 0 - uint64 size = EbmlElementSize(kMkvEBMLVersion, 1ULL); - size += EbmlElementSize(kMkvEBMLReadVersion, 1ULL); - size += EbmlElementSize(kMkvEBMLMaxIDLength, 4ULL); - size += EbmlElementSize(kMkvEBMLMaxSizeLength, 8ULL); - size += EbmlElementSize(kMkvDocType, "webm"); - size += EbmlElementSize(kMkvDocTypeVersion, doc_type_version); - size += EbmlElementSize(kMkvDocTypeReadVersion, 2ULL); + uint64_t size = EbmlElementSize(libwebm::kMkvEBMLVersion, UINT64_C(1)); + size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, UINT64_C(1)); + size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, UINT64_C(4)); + size += EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8)); + size += EbmlElementSize(libwebm::kMkvDocType, "webm"); + size += EbmlElementSize(libwebm::kMkvDocTypeVersion, doc_type_version); + size += EbmlElementSize(libwebm::kMkvDocTypeReadVersion, UINT64_C(2)); - if (!WriteEbmlMasterElement(writer, kMkvEBML, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size)) return false; - if (!WriteEbmlElement(writer, kMkvEBMLVersion, 1ULL)) + if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion, UINT64_C(1))) return false; - if (!WriteEbmlElement(writer, kMkvEBMLReadVersion, 1ULL)) + if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion, UINT64_C(1))) return false; - if (!WriteEbmlElement(writer, kMkvEBMLMaxIDLength, 4ULL)) + if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength, UINT64_C(4))) return false; - if (!WriteEbmlElement(writer, kMkvEBMLMaxSizeLength, 8ULL)) + if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8))) return false; - if (!WriteEbmlElement(writer, kMkvDocType, "webm")) + if (!WriteEbmlElement(writer, libwebm::kMkvDocType, "webm")) return false; - if (!WriteEbmlElement(writer, kMkvDocTypeVersion, doc_type_version)) + if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion, doc_type_version)) return false; - if (!WriteEbmlElement(writer, kMkvDocTypeReadVersion, 2ULL)) + if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion, UINT64_C(2))) return false; return true; @@ -100,16 +115,16 @@ bool WriteEbmlHeader(IMkvWriter* writer) { } bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst, - mkvmuxer::int64 start, int64 size) { + int64_t start, int64_t size) { // TODO(vigneshv): Check if this is a reasonable value. - const uint32 kBufSize = 2048; - uint8* buf = new uint8[kBufSize]; - int64 offset = start; + const uint32_t kBufSize = 2048; + uint8_t* buf = new uint8_t[kBufSize]; + int64_t offset = start; while (size > 0) { - const int64 read_len = (size > kBufSize) ? kBufSize : size; + const int64_t read_len = (size > kBufSize) ? kBufSize : size; if (source->Read(offset, static_cast<long>(read_len), buf)) return false; - dst->Write(buf, static_cast<uint32>(read_len)); + dst->Write(buf, static_cast<uint32_t>(read_len)); offset += read_len; size -= read_len; } @@ -126,6 +141,7 @@ Frame::Frame() additional_(NULL), additional_length_(0), duration_(0), + duration_set_(false), frame_(NULL), is_key_(false), length_(0), @@ -158,16 +174,19 @@ bool Frame::CopyFrom(const Frame& frame) { return false; } duration_ = frame.duration(); + duration_set_ = frame.duration_set(); is_key_ = frame.is_key(); track_number_ = frame.track_number(); timestamp_ = frame.timestamp(); discard_padding_ = frame.discard_padding(); + reference_block_timestamp_ = frame.reference_block_timestamp(); + reference_block_timestamp_set_ = frame.reference_block_timestamp_set(); return true; } -bool Frame::Init(const uint8* frame, uint64 length) { - uint8* const data = - new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT +bool Frame::Init(const uint8_t* frame, uint64_t length) { + uint8_t* const data = + new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT if (!data) return false; @@ -179,10 +198,10 @@ bool Frame::Init(const uint8* frame, uint64 length) { return true; } -bool Frame::AddAdditionalData(const uint8* additional, uint64 length, - uint64 add_id) { - uint8* const data = - new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT +bool Frame::AddAdditionalData(const uint8_t* additional, uint64_t length, + uint64_t add_id) { + uint8_t* const data = + new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT if (!data) return false; @@ -216,7 +235,12 @@ bool Frame::CanBeSimpleBlock() const { return additional_ == NULL && discard_padding_ == 0 && duration_ == 0; } -void Frame::set_reference_block_timestamp(int64 reference_block_timestamp) { +void Frame::set_duration(uint64_t duration) { + duration_ = duration; + duration_set_ = true; +} + +void Frame::set_reference_block_timestamp(int64_t reference_block_timestamp) { reference_block_timestamp_ = reference_block_timestamp; reference_block_timestamp_set_ = true; } @@ -238,61 +262,64 @@ bool CuePoint::Write(IMkvWriter* writer) const { if (!writer || track_ < 1 || cluster_pos_ < 1) return false; - uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_); - size += EbmlElementSize(kMkvCueTrack, track_); + uint64_t size = + EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_); + size += EbmlElementSize(libwebm::kMkvCueTrack, track_); if (output_block_number_ && block_number_ > 1) - size += EbmlElementSize(kMkvCueBlockNumber, block_number_); - const uint64 track_pos_size = - EbmlMasterElementSize(kMkvCueTrackPositions, size) + size; - const uint64 payload_size = - EbmlElementSize(kMkvCueTime, time_) + track_pos_size; + size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_); + const uint64_t track_pos_size = + EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; + const uint64_t payload_size = + EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size; - if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvCueTime, time_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCueTime, time_)) return false; - if (!WriteEbmlMasterElement(writer, kMkvCueTrackPositions, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size)) return false; - if (!WriteEbmlElement(writer, kMkvCueTrack, track_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack, track_)) return false; - if (!WriteEbmlElement(writer, kMkvCueClusterPosition, cluster_pos_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition, cluster_pos_)) return false; if (output_block_number_ && block_number_ > 1) - if (!WriteEbmlElement(writer, kMkvCueBlockNumber, block_number_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber, block_number_)) return false; - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0) return false; - if (stop_position - payload_position != static_cast<int64>(payload_size)) + if (stop_position - payload_position != static_cast<int64_t>(payload_size)) return false; return true; } -uint64 CuePoint::PayloadSize() const { - uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_); - size += EbmlElementSize(kMkvCueTrack, track_); +uint64_t CuePoint::PayloadSize() const { + uint64_t size = + EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_); + size += EbmlElementSize(libwebm::kMkvCueTrack, track_); if (output_block_number_ && block_number_ > 1) - size += EbmlElementSize(kMkvCueBlockNumber, block_number_); - const uint64 track_pos_size = - EbmlMasterElementSize(kMkvCueTrackPositions, size) + size; - const uint64 payload_size = - EbmlElementSize(kMkvCueTime, time_) + track_pos_size; + size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_); + const uint64_t track_pos_size = + EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; + const uint64_t payload_size = + EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size; return payload_size; } -uint64 CuePoint::Size() const { - const uint64 payload_size = PayloadSize(); - return EbmlMasterElementSize(kMkvCuePoint, payload_size) + payload_size; +uint64_t CuePoint::Size() const { + const uint64_t payload_size = PayloadSize(); + return EbmlMasterElementSize(libwebm::kMkvCuePoint, payload_size) + + payload_size; } /////////////////////////////////////////////////////////////// @@ -307,7 +334,7 @@ Cues::Cues() Cues::~Cues() { if (cue_entries_) { - for (int32 i = 0; i < cue_entries_size_; ++i) { + for (int32_t i = 0; i < cue_entries_size_; ++i) { CuePoint* const cue = cue_entries_[i]; delete cue; } @@ -321,7 +348,7 @@ bool Cues::AddCue(CuePoint* cue) { if ((cue_entries_size_ + 1) > cue_entries_capacity_) { // Add more CuePoints. - const int32 new_capacity = + const int32_t new_capacity = (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2; if (new_capacity < 1) @@ -332,7 +359,7 @@ bool Cues::AddCue(CuePoint* cue) { if (!cues) return false; - for (int32 i = 0; i < cue_entries_size_; ++i) { + for (int32_t i = 0; i < cue_entries_size_; ++i) { cues[i] = cue_entries_[i]; } @@ -347,7 +374,7 @@ bool Cues::AddCue(CuePoint* cue) { return true; } -CuePoint* Cues::GetCueByIndex(int32 index) const { +CuePoint* Cues::GetCueByIndex(int32_t index) const { if (cue_entries_ == NULL) return NULL; @@ -357,11 +384,11 @@ CuePoint* Cues::GetCueByIndex(int32 index) const { return cue_entries_[index]; } -uint64 Cues::Size() { - uint64 size = 0; - for (int32 i = 0; i < cue_entries_size_; ++i) +uint64_t Cues::Size() { + uint64_t size = 0; + for (int32_t i = 0; i < cue_entries_size_; ++i) size += GetCueByIndex(i)->Size(); - size += EbmlMasterElementSize(kMkvCues, size); + size += EbmlMasterElementSize(libwebm::kMkvCues, size); return size; } @@ -369,8 +396,8 @@ bool Cues::Write(IMkvWriter* writer) const { if (!writer) return false; - uint64 size = 0; - for (int32 i = 0; i < cue_entries_size_; ++i) { + uint64_t size = 0; + for (int32_t i = 0; i < cue_entries_size_; ++i) { const CuePoint* const cue = GetCueByIndex(i); if (!cue) @@ -379,25 +406,25 @@ bool Cues::Write(IMkvWriter* writer) const { size += cue->Size(); } - if (!WriteEbmlMasterElement(writer, kMkvCues, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvCues, size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - for (int32 i = 0; i < cue_entries_size_; ++i) { + for (int32_t i = 0; i < cue_entries_size_; ++i) { const CuePoint* const cue = GetCueByIndex(i); if (!cue->Write(writer)) return false; } - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0) return false; - if (stop_position - payload_position != static_cast<int64>(size)) + if (stop_position - payload_position != static_cast<int64_t>(size)) return false; return true; @@ -409,36 +436,40 @@ bool Cues::Write(IMkvWriter* writer) const { ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {} -uint64 ContentEncAESSettings::Size() const { - const uint64 payload = PayloadSize(); - const uint64 size = - EbmlMasterElementSize(kMkvContentEncAESSettings, payload) + payload; +uint64_t ContentEncAESSettings::Size() const { + const uint64_t payload = PayloadSize(); + const uint64_t size = + EbmlMasterElementSize(libwebm::kMkvContentEncAESSettings, payload) + + payload; return size; } bool ContentEncAESSettings::Write(IMkvWriter* writer) const { - const uint64 payload = PayloadSize(); + const uint64_t payload = PayloadSize(); - if (!WriteEbmlMasterElement(writer, kMkvContentEncAESSettings, payload)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncAESSettings, + payload)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvAESSettingsCipherMode, cipher_mode_)) + if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode, + cipher_mode_)) return false; - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(payload)) + stop_position - payload_position != static_cast<int64_t>(payload)) return false; return true; } -uint64 ContentEncAESSettings::PayloadSize() const { - uint64 size = EbmlElementSize(kMkvAESSettingsCipherMode, cipher_mode_); +uint64_t ContentEncAESSettings::PayloadSize() const { + uint64_t size = + EbmlElementSize(libwebm::kMkvAESSettingsCipherMode, cipher_mode_); return size; } @@ -456,14 +487,14 @@ ContentEncoding::ContentEncoding() ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; } -bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) { +bool ContentEncoding::SetEncryptionID(const uint8_t* id, uint64_t length) { if (!id || length < 1) return false; delete[] enc_key_id_; enc_key_id_ = - new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT if (!enc_key_id_) return false; @@ -473,79 +504,89 @@ bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) { return true; } -uint64 ContentEncoding::Size() const { - const uint64 encryption_size = EncryptionSize(); - const uint64 encoding_size = EncodingSize(0, encryption_size); - const uint64 encodings_size = - EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size; +uint64_t ContentEncoding::Size() const { + const uint64_t encryption_size = EncryptionSize(); + const uint64_t encoding_size = EncodingSize(0, encryption_size); + const uint64_t encodings_size = + EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + + encoding_size; return encodings_size; } bool ContentEncoding::Write(IMkvWriter* writer) const { - const uint64 encryption_size = EncryptionSize(); - const uint64 encoding_size = EncodingSize(0, encryption_size); - const uint64 size = - EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size; + const uint64_t encryption_size = EncryptionSize(); + const uint64_t encoding_size = EncodingSize(0, encryption_size); + const uint64_t size = + EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + + encoding_size; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlMasterElement(writer, kMkvContentEncoding, encoding_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncoding, + encoding_size)) return false; - if (!WriteEbmlElement(writer, kMkvContentEncodingOrder, encoding_order_)) + if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder, + encoding_order_)) return false; - if (!WriteEbmlElement(writer, kMkvContentEncodingScope, encoding_scope_)) + if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope, + encoding_scope_)) return false; - if (!WriteEbmlElement(writer, kMkvContentEncodingType, encoding_type_)) + if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType, + encoding_type_)) return false; - if (!WriteEbmlMasterElement(writer, kMkvContentEncryption, encryption_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption, + encryption_size)) return false; - if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_)) + if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo, enc_algo_)) return false; - if (!WriteEbmlElement(writer, kMkvContentEncKeyID, enc_key_id_, + if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_)) return false; if (!enc_aes_settings_.Write(writer)) return false; - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) + stop_position - payload_position != static_cast<int64_t>(size)) return false; return true; } -uint64 ContentEncoding::EncodingSize(uint64 compresion_size, - uint64 encryption_size) const { +uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size, + uint64_t encryption_size) const { // TODO(fgalligan): Add support for compression settings. if (compresion_size != 0) return 0; - uint64 encoding_size = 0; + uint64_t encoding_size = 0; if (encryption_size > 0) { encoding_size += - EbmlMasterElementSize(kMkvContentEncryption, encryption_size) + + EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) + encryption_size; } - encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_); - encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_); - encoding_size += EbmlElementSize(kMkvContentEncodingOrder, encoding_order_); + encoding_size += + EbmlElementSize(libwebm::kMkvContentEncodingType, encoding_type_); + encoding_size += + EbmlElementSize(libwebm::kMkvContentEncodingScope, encoding_scope_); + encoding_size += + EbmlElementSize(libwebm::kMkvContentEncodingOrder, encoding_order_); return encoding_size; } -uint64 ContentEncoding::EncryptionSize() const { - const uint64 aes_size = enc_aes_settings_.Size(); +uint64_t ContentEncoding::EncryptionSize() const { + const uint64_t aes_size = enc_aes_settings_.Size(); - uint64 encryption_size = - EbmlElementSize(kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_); - encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_); + uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID, + enc_key_id_, enc_key_id_length_); + encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo, enc_algo_); return encryption_size + aes_size; } @@ -577,7 +618,7 @@ Track::~Track() { delete[] name_; if (content_encoding_entries_) { - for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; delete encoding; } @@ -586,7 +627,7 @@ Track::~Track() { } bool Track::AddContentEncoding() { - const uint32 count = content_encoding_entries_size_ + 1; + const uint32_t count = content_encoding_entries_size_ + 1; ContentEncoding** const content_encoding_entries = new (std::nothrow) ContentEncoding*[count]; // NOLINT @@ -600,7 +641,7 @@ bool Track::AddContentEncoding() { return false; } - for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { content_encoding_entries[i] = content_encoding_entries_[i]; } @@ -612,7 +653,7 @@ bool Track::AddContentEncoding() { return true; } -ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const { +ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const { if (content_encoding_entries_ == NULL) return NULL; @@ -622,46 +663,47 @@ ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const { return content_encoding_entries_[index]; } -uint64 Track::PayloadSize() const { - uint64 size = EbmlElementSize(kMkvTrackNumber, number_); - size += EbmlElementSize(kMkvTrackUID, uid_); - size += EbmlElementSize(kMkvTrackType, type_); +uint64_t Track::PayloadSize() const { + uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_); + size += EbmlElementSize(libwebm::kMkvTrackUID, uid_); + size += EbmlElementSize(libwebm::kMkvTrackType, type_); if (codec_id_) - size += EbmlElementSize(kMkvCodecID, codec_id_); + size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); if (codec_private_) - size += EbmlElementSize(kMkvCodecPrivate, codec_private_, + size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, codec_private_length_); if (language_) - size += EbmlElementSize(kMkvLanguage, language_); + size += EbmlElementSize(libwebm::kMkvLanguage, language_); if (name_) - size += EbmlElementSize(kMkvName, name_); + size += EbmlElementSize(libwebm::kMkvName, name_); if (max_block_additional_id_) - size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_); + size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, + max_block_additional_id_); if (codec_delay_) - size += EbmlElementSize(kMkvCodecDelay, codec_delay_); + size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_); if (seek_pre_roll_) - size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_); + size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_); if (default_duration_) - size += EbmlElementSize(kMkvDefaultDuration, default_duration_); + size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_); if (content_encoding_entries_size_ > 0) { - uint64 content_encodings_size = 0; - for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + uint64_t content_encodings_size = 0; + for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; content_encodings_size += encoding->Size(); } - size += - EbmlMasterElementSize(kMkvContentEncodings, content_encodings_size) + - content_encodings_size; + size += EbmlMasterElementSize(libwebm::kMkvContentEncodings, + content_encodings_size) + + content_encodings_size; } return size; } -uint64 Track::Size() const { - uint64 size = PayloadSize(); - size += EbmlMasterElementSize(kMkvTrackEntry, size); +uint64_t Track::Size() const { + uint64_t size = PayloadSize(); + size += EbmlMasterElementSize(libwebm::kMkvTrackEntry, size); return size; } @@ -675,95 +717,97 @@ bool Track::Write(IMkvWriter* writer) const { // |size| may be bigger than what is written out in this function because // derived classes may write out more data in the Track element. - const uint64 payload_size = PayloadSize(); + const uint64_t payload_size = PayloadSize(); - if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size)) return false; - uint64 size = EbmlElementSize(kMkvTrackNumber, number_); - size += EbmlElementSize(kMkvTrackUID, uid_); - size += EbmlElementSize(kMkvTrackType, type_); + uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_); + size += EbmlElementSize(libwebm::kMkvTrackUID, uid_); + size += EbmlElementSize(libwebm::kMkvTrackType, type_); if (codec_id_) - size += EbmlElementSize(kMkvCodecID, codec_id_); + size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); if (codec_private_) - size += EbmlElementSize(kMkvCodecPrivate, codec_private_, + size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, codec_private_length_); if (language_) - size += EbmlElementSize(kMkvLanguage, language_); + size += EbmlElementSize(libwebm::kMkvLanguage, language_); if (name_) - size += EbmlElementSize(kMkvName, name_); + size += EbmlElementSize(libwebm::kMkvName, name_); if (max_block_additional_id_) - size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_); + size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, + max_block_additional_id_); if (codec_delay_) - size += EbmlElementSize(kMkvCodecDelay, codec_delay_); + size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_); if (seek_pre_roll_) - size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_); + size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_); if (default_duration_) - size += EbmlElementSize(kMkvDefaultDuration, default_duration_); + size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_); - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvTrackNumber, number_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber, number_)) return false; - if (!WriteEbmlElement(writer, kMkvTrackUID, uid_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID, uid_)) return false; - if (!WriteEbmlElement(writer, kMkvTrackType, type_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTrackType, type_)) return false; if (max_block_additional_id_) { - if (!WriteEbmlElement(writer, kMkvMaxBlockAdditionID, + if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID, max_block_additional_id_)) { return false; } } if (codec_delay_) { - if (!WriteEbmlElement(writer, kMkvCodecDelay, codec_delay_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay, codec_delay_)) return false; } if (seek_pre_roll_) { - if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_)) + if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll, seek_pre_roll_)) return false; } if (default_duration_) { - if (!WriteEbmlElement(writer, kMkvDefaultDuration, default_duration_)) + if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration, + default_duration_)) return false; } if (codec_id_) { - if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_)) + if (!WriteEbmlElement(writer, libwebm::kMkvCodecID, codec_id_)) return false; } if (codec_private_) { - if (!WriteEbmlElement(writer, kMkvCodecPrivate, codec_private_, + if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_, codec_private_length_)) return false; } if (language_) { - if (!WriteEbmlElement(writer, kMkvLanguage, language_)) + if (!WriteEbmlElement(writer, libwebm::kMkvLanguage, language_)) return false; } if (name_) { - if (!WriteEbmlElement(writer, kMkvName, name_)) + if (!WriteEbmlElement(writer, libwebm::kMkvName, name_)) return false; } - int64 stop_position = writer->Position(); + int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) + stop_position - payload_position != static_cast<int64_t>(size)) return false; if (content_encoding_entries_size_ > 0) { - uint64 content_encodings_size = 0; - for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + uint64_t content_encodings_size = 0; + for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; content_encodings_size += encoding->Size(); } - if (!WriteEbmlMasterElement(writer, kMkvContentEncodings, + if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncodings, content_encodings_size)) return false; - for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; if (!encoding->Write(writer)) return false; @@ -776,14 +820,14 @@ bool Track::Write(IMkvWriter* writer) const { return true; } -bool Track::SetCodecPrivate(const uint8* codec_private, uint64 length) { +bool Track::SetCodecPrivate(const uint8_t* codec_private, uint64_t length) { if (!codec_private || length < 1) return false; delete[] codec_private_; codec_private_ = - new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT if (!codec_private_) return false; @@ -844,6 +888,279 @@ void Track::set_name(const char* name) { /////////////////////////////////////////////////////////////// // +// Colour and its child elements + +uint64_t PrimaryChromaticity::PrimaryChromaticityPayloadSize( + libwebm::MkvId x_id, libwebm::MkvId y_id) const { + return EbmlElementSize(x_id, x) + EbmlElementSize(y_id, y); +} + +bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id, + libwebm::MkvId y_id) const { + return WriteEbmlElement(writer, x_id, x) && WriteEbmlElement(writer, y_id, y); +} + +uint64_t MasteringMetadata::MasteringMetadataSize() const { + uint64_t size = PayloadSize(); + + if (size > 0) + size += EbmlMasterElementSize(libwebm::kMkvMasteringMetadata, size); + + return size; +} + +bool MasteringMetadata::Write(IMkvWriter* writer) const { + const uint64_t size = PayloadSize(); + + // Don't write an empty element. + if (size == 0) + return true; + + if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size)) + return false; + if (luminance_max != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max)) { + return false; + } + if (luminance_min != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min)) { + return false; + } + if (r_ && + !r_->Write(writer, libwebm::kMkvPrimaryRChromaticityX, + libwebm::kMkvPrimaryRChromaticityY)) { + return false; + } + if (g_ && + !g_->Write(writer, libwebm::kMkvPrimaryGChromaticityX, + libwebm::kMkvPrimaryGChromaticityY)) { + return false; + } + if (b_ && + !b_->Write(writer, libwebm::kMkvPrimaryBChromaticityX, + libwebm::kMkvPrimaryBChromaticityY)) { + return false; + } + if (white_point_ && + !white_point_->Write(writer, libwebm::kMkvWhitePointChromaticityX, + libwebm::kMkvWhitePointChromaticityY)) { + return false; + } + + return true; +} + +bool MasteringMetadata::SetChromaticity( + const PrimaryChromaticity* r, const PrimaryChromaticity* g, + const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) { + PrimaryChromaticityPtr r_ptr(NULL); + if (r) { + if (!CopyChromaticity(r, &r_ptr)) + return false; + } + PrimaryChromaticityPtr g_ptr(NULL); + if (g) { + if (!CopyChromaticity(g, &g_ptr)) + return false; + } + PrimaryChromaticityPtr b_ptr(NULL); + if (b) { + if (!CopyChromaticity(b, &b_ptr)) + return false; + } + PrimaryChromaticityPtr wp_ptr(NULL); + if (white_point) { + if (!CopyChromaticity(white_point, &wp_ptr)) + return false; + } + + r_ = r_ptr.release(); + g_ = g_ptr.release(); + b_ = b_ptr.release(); + white_point_ = wp_ptr.release(); + return true; +} + +uint64_t MasteringMetadata::PayloadSize() const { + uint64_t size = 0; + + if (luminance_max != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max); + if (luminance_min != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min); + + if (r_) { + size += r_->PrimaryChromaticityPayloadSize( + libwebm::kMkvPrimaryRChromaticityX, libwebm::kMkvPrimaryRChromaticityY); + } + if (g_) { + size += g_->PrimaryChromaticityPayloadSize( + libwebm::kMkvPrimaryGChromaticityX, libwebm::kMkvPrimaryGChromaticityY); + } + if (b_) { + size += b_->PrimaryChromaticityPayloadSize( + libwebm::kMkvPrimaryBChromaticityX, libwebm::kMkvPrimaryBChromaticityY); + } + if (white_point_) { + size += white_point_->PrimaryChromaticityPayloadSize( + libwebm::kMkvWhitePointChromaticityX, + libwebm::kMkvWhitePointChromaticityY); + } + + return size; +} + +uint64_t Colour::ColourSize() const { + uint64_t size = PayloadSize(); + + if (size > 0) + size += EbmlMasterElementSize(libwebm::kMkvColour, size); + + return size; +} + +bool Colour::Write(IMkvWriter* writer) const { + const uint64_t size = PayloadSize(); + + // Don't write an empty element. + if (size == 0) + return true; + + if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size)) + return false; + + if (matrix_coefficients != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients, + matrix_coefficients)) { + return false; + } + if (bits_per_channel != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel, + bits_per_channel)) { + return false; + } + if (chroma_subsampling_horz != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz, + chroma_subsampling_horz)) { + return false; + } + if (chroma_subsampling_vert != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert, + chroma_subsampling_vert)) { + return false; + } + + if (cb_subsampling_horz != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz, + cb_subsampling_horz)) { + return false; + } + if (cb_subsampling_vert != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert, + cb_subsampling_vert)) { + return false; + } + if (chroma_siting_horz != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz, + chroma_siting_horz)) { + return false; + } + if (chroma_siting_vert != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert, + chroma_siting_vert)) { + return false; + } + if (range != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvRange, range)) { + return false; + } + if (transfer_characteristics != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics, + transfer_characteristics)) { + return false; + } + if (primaries != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvPrimaries, primaries)) { + return false; + } + if (max_cll != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvMaxCLL, max_cll)) { + return false; + } + if (max_fall != kValueNotPresent && + !WriteEbmlElement(writer, libwebm::kMkvMaxFALL, max_fall)) { + return false; + } + + if (mastering_metadata_ && !mastering_metadata_->Write(writer)) + return false; + + return true; +} + +bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) { + std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); + if (!mm_ptr.get()) + return false; + + mm_ptr->luminance_max = mastering_metadata.luminance_max; + mm_ptr->luminance_min = mastering_metadata.luminance_min; + + if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(), + mastering_metadata.b(), + mastering_metadata.white_point())) { + return false; + } + + delete mastering_metadata_; + mastering_metadata_ = mm_ptr.release(); + return true; +} + +uint64_t Colour::PayloadSize() const { + uint64_t size = 0; + + if (matrix_coefficients != kValueNotPresent) + size += + EbmlElementSize(libwebm::kMkvMatrixCoefficients, matrix_coefficients); + if (bits_per_channel != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvBitsPerChannel, bits_per_channel); + if (chroma_subsampling_horz != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz, + chroma_subsampling_horz); + if (chroma_subsampling_vert != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert, + chroma_subsampling_vert); + if (cb_subsampling_horz != kValueNotPresent) + size += + EbmlElementSize(libwebm::kMkvCbSubsamplingHorz, cb_subsampling_horz); + if (cb_subsampling_vert != kValueNotPresent) + size += + EbmlElementSize(libwebm::kMkvCbSubsamplingVert, cb_subsampling_vert); + if (chroma_siting_horz != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvChromaSitingHorz, chroma_siting_horz); + if (chroma_siting_vert != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvChromaSitingVert, chroma_siting_vert); + if (range != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvRange, range); + if (transfer_characteristics != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvTransferCharacteristics, + transfer_characteristics); + if (primaries != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvPrimaries, primaries); + if (max_cll != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvMaxCLL, max_cll); + if (max_fall != kValueNotPresent) + size += EbmlElementSize(libwebm::kMkvMaxFALL, max_fall); + + if (mastering_metadata_) + size += mastering_metadata_->MasteringMetadataSize(); + + return size; +} + +/////////////////////////////////////////////////////////////// +// // VideoTrack Class VideoTrack::VideoTrack(unsigned int* seed) @@ -858,11 +1175,12 @@ VideoTrack::VideoTrack(unsigned int* seed) height_(0), stereo_mode_(0), alpha_mode_(0), - width_(0) {} + width_(0), + colour_(NULL) {} -VideoTrack::~VideoTrack() {} +VideoTrack::~VideoTrack() { delete colour_; } -bool VideoTrack::SetStereoMode(uint64 stereo_mode) { +bool VideoTrack::SetStereoMode(uint64_t stereo_mode) { if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst && stereo_mode != kTopBottomRightIsFirst && stereo_mode != kTopBottomLeftIsFirst && @@ -873,7 +1191,7 @@ bool VideoTrack::SetStereoMode(uint64 stereo_mode) { return true; } -bool VideoTrack::SetAlphaMode(uint64 alpha_mode) { +bool VideoTrack::SetAlphaMode(uint64_t alpha_mode) { if (alpha_mode != kNoAlpha && alpha_mode != kAlpha) return false; @@ -881,11 +1199,11 @@ bool VideoTrack::SetAlphaMode(uint64 alpha_mode) { return true; } -uint64 VideoTrack::PayloadSize() const { - const uint64 parent_size = Track::PayloadSize(); +uint64_t VideoTrack::PayloadSize() const { + const uint64_t parent_size = Track::PayloadSize(); - uint64 size = VideoPayloadSize(); - size += EbmlMasterElementSize(kMkvVideo, size); + uint64_t size = VideoPayloadSize(); + size += EbmlMasterElementSize(libwebm::kMkvVideo, size); return parent_size + size; } @@ -894,88 +1212,122 @@ bool VideoTrack::Write(IMkvWriter* writer) const { if (!Track::Write(writer)) return false; - const uint64 size = VideoPayloadSize(); + const uint64_t size = VideoPayloadSize(); - if (!WriteEbmlMasterElement(writer, kMkvVideo, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvVideo, size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvPixelWidth, width_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelWidth, width_)) return false; - if (!WriteEbmlElement(writer, kMkvPixelHeight, height_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelHeight, height_)) return false; if (display_width_ > 0) { - if (!WriteEbmlElement(writer, kMkvDisplayWidth, display_width_)) + if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth, display_width_)) return false; } if (display_height_ > 0) { - if (!WriteEbmlElement(writer, kMkvDisplayHeight, display_height_)) + if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight, display_height_)) return false; } if (crop_left_ > 0) { - if (!WriteEbmlElement(writer, kMkvPixelCropLeft, crop_left_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft, crop_left_)) return false; } if (crop_right_ > 0) { - if (!WriteEbmlElement(writer, kMkvPixelCropRight, crop_right_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight, crop_right_)) return false; } if (crop_top_ > 0) { - if (!WriteEbmlElement(writer, kMkvPixelCropTop, crop_top_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop, crop_top_)) return false; } if (crop_bottom_ > 0) { - if (!WriteEbmlElement(writer, kMkvPixelCropBottom, crop_bottom_)) + if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom, crop_bottom_)) return false; } if (stereo_mode_ > kMono) { - if (!WriteEbmlElement(writer, kMkvStereoMode, stereo_mode_)) + if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode, stereo_mode_)) return false; } if (alpha_mode_ > kNoAlpha) { - if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_)) + if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode, alpha_mode_)) return false; } if (frame_rate_ > 0.0) { - if (!WriteEbmlElement(writer, kMkvFrameRate, + if (!WriteEbmlElement(writer, libwebm::kMkvFrameRate, static_cast<float>(frame_rate_))) { return false; } } + if (colour_) { + if (!colour_->Write(writer)) + return false; + } - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) { + stop_position - payload_position != static_cast<int64_t>(size)) { + return false; + } + + return true; +} + +bool VideoTrack::SetColour(const Colour& colour) { + std::auto_ptr<Colour> colour_ptr(new Colour()); + if (!colour_ptr.get()) return false; + + if (colour.mastering_metadata()) { + if (!colour_ptr->SetMasteringMetadata(*colour.mastering_metadata())) + return false; } + colour_ptr->matrix_coefficients = colour.matrix_coefficients; + colour_ptr->bits_per_channel = colour.bits_per_channel; + colour_ptr->chroma_subsampling_horz = colour.chroma_subsampling_horz; + colour_ptr->chroma_subsampling_vert = colour.chroma_subsampling_vert; + colour_ptr->cb_subsampling_horz = colour.cb_subsampling_horz; + colour_ptr->cb_subsampling_vert = colour.cb_subsampling_vert; + colour_ptr->chroma_siting_horz = colour.chroma_siting_horz; + colour_ptr->chroma_siting_vert = colour.chroma_siting_vert; + colour_ptr->range = colour.range; + colour_ptr->transfer_characteristics = colour.transfer_characteristics; + colour_ptr->primaries = colour.primaries; + colour_ptr->max_cll = colour.max_cll; + colour_ptr->max_fall = colour.max_fall; + colour_ = colour_ptr.release(); return true; } -uint64 VideoTrack::VideoPayloadSize() const { - uint64 size = EbmlElementSize(kMkvPixelWidth, width_); - size += EbmlElementSize(kMkvPixelHeight, height_); +uint64_t VideoTrack::VideoPayloadSize() const { + uint64_t size = EbmlElementSize(libwebm::kMkvPixelWidth, width_); + size += EbmlElementSize(libwebm::kMkvPixelHeight, height_); if (display_width_ > 0) - size += EbmlElementSize(kMkvDisplayWidth, display_width_); + size += EbmlElementSize(libwebm::kMkvDisplayWidth, display_width_); if (display_height_ > 0) - size += EbmlElementSize(kMkvDisplayHeight, display_height_); + size += EbmlElementSize(libwebm::kMkvDisplayHeight, display_height_); if (crop_left_ > 0) - size += EbmlElementSize(kMkvPixelCropLeft, crop_left_); + size += EbmlElementSize(libwebm::kMkvPixelCropLeft, crop_left_); if (crop_right_ > 0) - size += EbmlElementSize(kMkvPixelCropRight, crop_right_); + size += EbmlElementSize(libwebm::kMkvPixelCropRight, crop_right_); if (crop_top_ > 0) - size += EbmlElementSize(kMkvPixelCropTop, crop_top_); + size += EbmlElementSize(libwebm::kMkvPixelCropTop, crop_top_); if (crop_bottom_ > 0) - size += EbmlElementSize(kMkvPixelCropBottom, crop_bottom_); + size += EbmlElementSize(libwebm::kMkvPixelCropBottom, crop_bottom_); if (stereo_mode_ > kMono) - size += EbmlElementSize(kMkvStereoMode, stereo_mode_); + size += EbmlElementSize(libwebm::kMkvStereoMode, stereo_mode_); if (alpha_mode_ > kNoAlpha) - size += EbmlElementSize(kMkvAlphaMode, alpha_mode_); + size += EbmlElementSize(libwebm::kMkvAlphaMode, alpha_mode_); if (frame_rate_ > 0.0) - size += EbmlElementSize(kMkvFrameRate, static_cast<float>(frame_rate_)); + size += EbmlElementSize(libwebm::kMkvFrameRate, + static_cast<float>(frame_rate_)); + if (colour_) + size += colour_->ColourSize(); return size; } @@ -989,15 +1341,15 @@ AudioTrack::AudioTrack(unsigned int* seed) AudioTrack::~AudioTrack() {} -uint64 AudioTrack::PayloadSize() const { - const uint64 parent_size = Track::PayloadSize(); +uint64_t AudioTrack::PayloadSize() const { + const uint64_t parent_size = Track::PayloadSize(); - uint64 size = - EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_)); - size += EbmlElementSize(kMkvChannels, channels_); + uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, + static_cast<float>(sample_rate_)); + size += EbmlElementSize(libwebm::kMkvChannels, channels_); if (bit_depth_ > 0) - size += EbmlElementSize(kMkvBitDepth, bit_depth_); - size += EbmlMasterElementSize(kMkvAudio, size); + size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_); + size += EbmlMasterElementSize(libwebm::kMkvAudio, size); return parent_size + size; } @@ -1007,31 +1359,31 @@ bool AudioTrack::Write(IMkvWriter* writer) const { return false; // Calculate AudioSettings size. - uint64 size = - EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_)); - size += EbmlElementSize(kMkvChannels, channels_); + uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, + static_cast<float>(sample_rate_)); + size += EbmlElementSize(libwebm::kMkvChannels, channels_); if (bit_depth_ > 0) - size += EbmlElementSize(kMkvBitDepth, bit_depth_); + size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_); - if (!WriteEbmlMasterElement(writer, kMkvAudio, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvSamplingFrequency, + if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency, static_cast<float>(sample_rate_))) return false; - if (!WriteEbmlElement(writer, kMkvChannels, channels_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChannels, channels_)) return false; if (bit_depth_ > 0) - if (!WriteEbmlElement(writer, kMkvBitDepth, bit_depth_)) + if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth, bit_depth_)) return false; - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) + stop_position - payload_position != static_cast<int64_t>(size)) return false; return true; @@ -1047,11 +1399,12 @@ const char Tracks::kVp8CodecId[] = "V_VP8"; const char Tracks::kVp9CodecId[] = "V_VP9"; const char Tracks::kVp10CodecId[] = "V_VP10"; -Tracks::Tracks() : track_entries_(NULL), track_entries_size_(0) {} +Tracks::Tracks() + : track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {} Tracks::~Tracks() { if (track_entries_) { - for (uint32 i = 0; i < track_entries_size_; ++i) { + for (uint32_t i = 0; i < track_entries_size_; ++i) { Track* const track = track_entries_[i]; delete track; } @@ -1059,8 +1412,8 @@ Tracks::~Tracks() { } } -bool Tracks::AddTrack(Track* track, int32 number) { - if (number < 0) +bool Tracks::AddTrack(Track* track, int32_t number) { + if (number < 0 || wrote_tracks_) return false; // This muxer only supports track numbers in the range [1, 126], in @@ -1071,23 +1424,23 @@ bool Tracks::AddTrack(Track* track, int32 number) { if (number > 0x7E) return false; - uint32 track_num = number; + uint32_t track_num = number; if (track_num > 0) { // Check to make sure a track does not already have |track_num|. - for (uint32 i = 0; i < track_entries_size_; ++i) { + for (uint32_t i = 0; i < track_entries_size_; ++i) { if (track_entries_[i]->number() == track_num) return false; } } - const uint32 count = track_entries_size_ + 1; + const uint32_t count = track_entries_size_ + 1; Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT if (!track_entries) return false; - for (uint32 i = 0; i < track_entries_size_; ++i) { + for (uint32_t i = 0; i < track_entries_size_; ++i) { track_entries[i] = track_entries_[i]; } @@ -1101,7 +1454,7 @@ bool Tracks::AddTrack(Track* track, int32 number) { bool exit = false; do { exit = true; - for (uint32 i = 0; i < track_entries_size_; ++i) { + for (uint32_t i = 0; i < track_entries_size_; ++i) { if (track_entries[i]->number() == track_num) { track_num++; exit = false; @@ -1118,7 +1471,7 @@ bool Tracks::AddTrack(Track* track, int32 number) { return true; } -const Track* Tracks::GetTrackByIndex(uint32 index) const { +const Track* Tracks::GetTrackByIndex(uint32_t index) const { if (track_entries_ == NULL) return NULL; @@ -1128,9 +1481,9 @@ const Track* Tracks::GetTrackByIndex(uint32 index) const { return track_entries_[index]; } -Track* Tracks::GetTrackByNumber(uint64 track_number) const { - const int32 count = track_entries_size(); - for (int32 i = 0; i < count; ++i) { +Track* Tracks::GetTrackByNumber(uint64_t track_number) const { + const int32_t count = track_entries_size(); + for (int32_t i = 0; i < count; ++i) { if (track_entries_[i]->number() == track_number) return track_entries_[i]; } @@ -1138,7 +1491,7 @@ Track* Tracks::GetTrackByNumber(uint64 track_number) const { return NULL; } -bool Tracks::TrackIsAudio(uint64 track_number) const { +bool Tracks::TrackIsAudio(uint64_t track_number) const { const Track* const track = GetTrackByNumber(track_number); if (track->type() == kAudio) @@ -1147,7 +1500,7 @@ bool Tracks::TrackIsAudio(uint64 track_number) const { return false; } -bool Tracks::TrackIsVideo(uint64 track_number) const { +bool Tracks::TrackIsVideo(uint64_t track_number) const { const Track* const track = GetTrackByNumber(track_number); if (track->type() == kVideo) @@ -1157,9 +1510,9 @@ bool Tracks::TrackIsVideo(uint64 track_number) const { } bool Tracks::Write(IMkvWriter* writer) const { - uint64 size = 0; - const int32 count = track_entries_size(); - for (int32 i = 0; i < count; ++i) { + uint64_t size = 0; + const int32_t count = track_entries_size(); + for (int32_t i = 0; i < count; ++i) { const Track* const track = GetTrackByIndex(i); if (!track) @@ -1168,24 +1521,25 @@ bool Tracks::Write(IMkvWriter* writer) const { size += track->Size(); } - if (!WriteEbmlMasterElement(writer, kMkvTracks, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvTracks, size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - for (int32 i = 0; i < count; ++i) { + for (int32_t i = 0; i < count; ++i) { const Track* const track = GetTrackByIndex(i); if (!track->Write(writer)) return false; } - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) + stop_position - payload_position != static_cast<int64_t>(size)) return false; + wrote_tracks_ = true; return true; } @@ -1195,9 +1549,10 @@ bool Tracks::Write(IMkvWriter* writer) const { bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); } -void Chapter::set_time(const Segment& segment, uint64 start_ns, uint64 end_ns) { +void Chapter::set_time(const Segment& segment, uint64_t start_ns, + uint64_t end_ns) { const SegmentInfo* const info = segment.GetSegmentInfo(); - const uint64 timecode_scale = info->timecode_scale(); + const uint64_t timecode_scale = info->timecode_scale(); start_timecode_ = start_ns / timecode_scale; end_timecode_ = end_ns / timecode_scale; } @@ -1292,38 +1647,40 @@ bool Chapter::ExpandDisplaysArray() { return true; } -uint64 Chapter::WriteAtom(IMkvWriter* writer) const { - uint64 payload_size = EbmlElementSize(kMkvChapterStringUID, id_) + - EbmlElementSize(kMkvChapterUID, uid_) + - EbmlElementSize(kMkvChapterTimeStart, start_timecode_) + - EbmlElementSize(kMkvChapterTimeEnd, end_timecode_); +uint64_t Chapter::WriteAtom(IMkvWriter* writer) const { + uint64_t payload_size = + EbmlElementSize(libwebm::kMkvChapterStringUID, id_) + + EbmlElementSize(libwebm::kMkvChapterUID, uid_) + + EbmlElementSize(libwebm::kMkvChapterTimeStart, start_timecode_) + + EbmlElementSize(libwebm::kMkvChapterTimeEnd, end_timecode_); for (int idx = 0; idx < displays_count_; ++idx) { const Display& d = displays_[idx]; payload_size += d.WriteDisplay(NULL); } - const uint64 atom_size = - EbmlMasterElementSize(kMkvChapterAtom, payload_size) + payload_size; + const uint64_t atom_size = + EbmlMasterElementSize(libwebm::kMkvChapterAtom, payload_size) + + payload_size; if (writer == NULL) return atom_size; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); - if (!WriteEbmlMasterElement(writer, kMkvChapterAtom, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterAtom, payload_size)) return 0; - if (!WriteEbmlElement(writer, kMkvChapterStringUID, id_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_)) return 0; - if (!WriteEbmlElement(writer, kMkvChapterUID, uid_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID, uid_)) return 0; - if (!WriteEbmlElement(writer, kMkvChapterTimeStart, start_timecode_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart, start_timecode_)) return 0; - if (!WriteEbmlElement(writer, kMkvChapterTimeEnd, end_timecode_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd, end_timecode_)) return 0; for (int idx = 0; idx < displays_count_; ++idx) { @@ -1333,9 +1690,9 @@ uint64 Chapter::WriteAtom(IMkvWriter* writer) const { return 0; } - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != atom_size) + if (stop >= start && uint64_t(stop - start) != atom_size) return 0; return atom_size; @@ -1365,42 +1722,44 @@ bool Chapter::Display::set_country(const char* country) { return StrCpy(country, &country_); } -uint64 Chapter::Display::WriteDisplay(IMkvWriter* writer) const { - uint64 payload_size = EbmlElementSize(kMkvChapString, title_); +uint64_t Chapter::Display::WriteDisplay(IMkvWriter* writer) const { + uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapString, title_); if (language_) - payload_size += EbmlElementSize(kMkvChapLanguage, language_); + payload_size += EbmlElementSize(libwebm::kMkvChapLanguage, language_); if (country_) - payload_size += EbmlElementSize(kMkvChapCountry, country_); + payload_size += EbmlElementSize(libwebm::kMkvChapCountry, country_); - const uint64 display_size = - EbmlMasterElementSize(kMkvChapterDisplay, payload_size) + payload_size; + const uint64_t display_size = + EbmlMasterElementSize(libwebm::kMkvChapterDisplay, payload_size) + + payload_size; if (writer == NULL) return display_size; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); - if (!WriteEbmlMasterElement(writer, kMkvChapterDisplay, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterDisplay, + payload_size)) return 0; - if (!WriteEbmlElement(writer, kMkvChapString, title_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapString, title_)) return 0; if (language_) { - if (!WriteEbmlElement(writer, kMkvChapLanguage, language_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapLanguage, language_)) return 0; } if (country_) { - if (!WriteEbmlElement(writer, kMkvChapCountry, country_)) + if (!WriteEbmlElement(writer, libwebm::kMkvChapCountry, country_)) return 0; } - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != display_size) + if (stop >= start && uint64_t(stop - start) != display_size) return 0; return display_size; @@ -1438,19 +1797,19 @@ bool Chapters::Write(IMkvWriter* writer) const { if (writer == NULL) return false; - const uint64 payload_size = WriteEdition(NULL); // return size only + const uint64_t payload_size = WriteEdition(NULL); // return size only - if (!WriteEbmlMasterElement(writer, kMkvChapters, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapters, payload_size)) return false; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); if (WriteEdition(writer) == 0) // error return false; - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != payload_size) + if (stop >= start && uint64_t(stop - start) != payload_size) return false; return true; @@ -1480,36 +1839,37 @@ bool Chapters::ExpandChaptersArray() { return true; } -uint64 Chapters::WriteEdition(IMkvWriter* writer) const { - uint64 payload_size = 0; +uint64_t Chapters::WriteEdition(IMkvWriter* writer) const { + uint64_t payload_size = 0; for (int idx = 0; idx < chapters_count_; ++idx) { const Chapter& chapter = chapters_[idx]; payload_size += chapter.WriteAtom(NULL); } - const uint64 edition_size = - EbmlMasterElementSize(kMkvEditionEntry, payload_size) + payload_size; + const uint64_t edition_size = + EbmlMasterElementSize(libwebm::kMkvEditionEntry, payload_size) + + payload_size; if (writer == NULL) // return size only return edition_size; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); - if (!WriteEbmlMasterElement(writer, kMkvEditionEntry, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvEditionEntry, payload_size)) return 0; // error for (int idx = 0; idx < chapters_count_; ++idx) { const Chapter& chapter = chapters_[idx]; - const uint64 chapter_size = chapter.WriteAtom(writer); + const uint64_t chapter_size = chapter.WriteAtom(writer); if (chapter_size == 0) // error return 0; } - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != edition_size) + if (stop >= start && uint64_t(stop - start) != edition_size) return 0; return edition_size; @@ -1581,23 +1941,23 @@ bool Tag::ExpandSimpleTagsArray() { return true; } -uint64 Tag::Write(IMkvWriter* writer) const { - uint64 payload_size = 0; +uint64_t Tag::Write(IMkvWriter* writer) const { + uint64_t payload_size = 0; for (int idx = 0; idx < simple_tags_count_; ++idx) { const SimpleTag& st = simple_tags_[idx]; payload_size += st.Write(NULL); } - const uint64 tag_size = - EbmlMasterElementSize(kMkvTag, payload_size) + payload_size; + const uint64_t tag_size = + EbmlMasterElementSize(libwebm::kMkvTag, payload_size) + payload_size; if (writer == NULL) return tag_size; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); - if (!WriteEbmlMasterElement(writer, kMkvTag, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvTag, payload_size)) return 0; for (int idx = 0; idx < simple_tags_count_; ++idx) { @@ -1607,9 +1967,9 @@ uint64 Tag::Write(IMkvWriter* writer) const { return 0; } - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != tag_size) + if (stop >= start && uint64_t(stop - start) != tag_size) return 0; return tag_size; @@ -1635,31 +1995,32 @@ bool Tag::SimpleTag::set_tag_string(const char* tag_string) { return StrCpy(tag_string, &tag_string_); } -uint64 Tag::SimpleTag::Write(IMkvWriter* writer) const { - uint64 payload_size = EbmlElementSize(kMkvTagName, tag_name_); +uint64_t Tag::SimpleTag::Write(IMkvWriter* writer) const { + uint64_t payload_size = EbmlElementSize(libwebm::kMkvTagName, tag_name_); - payload_size += EbmlElementSize(kMkvTagString, tag_string_); + payload_size += EbmlElementSize(libwebm::kMkvTagString, tag_string_); - const uint64 simple_tag_size = - EbmlMasterElementSize(kMkvSimpleTag, payload_size) + payload_size; + const uint64_t simple_tag_size = + EbmlMasterElementSize(libwebm::kMkvSimpleTag, payload_size) + + payload_size; if (writer == NULL) return simple_tag_size; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); - if (!WriteEbmlMasterElement(writer, kMkvSimpleTag, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvSimpleTag, payload_size)) return 0; - if (!WriteEbmlElement(writer, kMkvTagName, tag_name_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTagName, tag_name_)) return 0; - if (!WriteEbmlElement(writer, kMkvTagString, tag_string_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTagString, tag_string_)) return 0; - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != simple_tag_size) + if (stop >= start && uint64_t(stop - start) != simple_tag_size) return 0; return simple_tag_size; @@ -1694,29 +2055,29 @@ bool Tags::Write(IMkvWriter* writer) const { if (writer == NULL) return false; - uint64 payload_size = 0; + uint64_t payload_size = 0; for (int idx = 0; idx < tags_count_; ++idx) { const Tag& tag = tags_[idx]; payload_size += tag.Write(NULL); } - if (!WriteEbmlMasterElement(writer, kMkvTags, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvTags, payload_size)) return false; - const int64 start = writer->Position(); + const int64_t start = writer->Position(); for (int idx = 0; idx < tags_count_; ++idx) { const Tag& tag = tags_[idx]; - const uint64 tag_size = tag.Write(writer); + const uint64_t tag_size = tag.Write(writer); if (tag_size == 0) // error return 0; } - const int64 stop = writer->Position(); + const int64_t stop = writer->Position(); - if (stop >= start && uint64(stop - start) != payload_size) + if (stop >= start && uint64_t(stop - start) != payload_size) return false; return true; @@ -1750,15 +2111,18 @@ bool Tags::ExpandTagsArray() { // // Cluster class -Cluster::Cluster(uint64 timecode, int64 cues_pos, uint64 timecode_scale) +Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, + bool write_last_frame_with_duration, bool fixed_size_timecode) : blocks_added_(0), finalized_(false), + fixed_size_timecode_(fixed_size_timecode), header_written_(false), payload_size_(0), position_for_cues_(cues_pos), size_position_(-1), timecode_(timecode), timecode_scale_(timecode_scale), + write_last_frame_with_duration_(write_last_frame_with_duration), writer_(NULL) {} Cluster::~Cluster() {} @@ -1771,24 +2135,27 @@ bool Cluster::Init(IMkvWriter* ptr_writer) { return true; } -bool Cluster::AddFrame(const Frame* const frame) { return DoWriteFrame(frame); } +bool Cluster::AddFrame(const Frame* const frame) { + return QueueOrWriteFrame(frame); +} -bool Cluster::AddFrame(const uint8* data, uint64 length, uint64 track_number, - uint64 abs_timecode, bool is_key) { +bool Cluster::AddFrame(const uint8_t* data, uint64_t length, + uint64_t track_number, uint64_t abs_timecode, + bool is_key) { Frame frame; if (!frame.Init(data, length)) return false; frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); - return DoWriteFrame(&frame); + return QueueOrWriteFrame(&frame); } -bool Cluster::AddFrameWithAdditional(const uint8* data, uint64 length, - const uint8* additional, - uint64 additional_length, uint64 add_id, - uint64 track_number, uint64 abs_timecode, - bool is_key) { +bool Cluster::AddFrameWithAdditional(const uint8_t* data, uint64_t length, + const uint8_t* additional, + uint64_t additional_length, + uint64_t add_id, uint64_t track_number, + uint64_t abs_timecode, bool is_key) { if (!additional || additional_length == 0) { return false; } @@ -1800,13 +2167,13 @@ bool Cluster::AddFrameWithAdditional(const uint8* data, uint64 length, frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); - return DoWriteFrame(&frame); + return QueueOrWriteFrame(&frame); } -bool Cluster::AddFrameWithDiscardPadding(const uint8* data, uint64 length, - int64 discard_padding, - uint64 track_number, - uint64 abs_timecode, bool is_key) { +bool Cluster::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, + int64_t discard_padding, + uint64_t track_number, + uint64_t abs_timecode, bool is_key) { Frame frame; if (!frame.Init(data, length)) return false; @@ -1814,11 +2181,12 @@ bool Cluster::AddFrameWithDiscardPadding(const uint8* data, uint64 length, frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); - return DoWriteFrame(&frame); + return QueueOrWriteFrame(&frame); } -bool Cluster::AddMetadata(const uint8* data, uint64 length, uint64 track_number, - uint64 abs_timecode, uint64 duration_timecode) { +bool Cluster::AddMetadata(const uint8_t* data, uint64_t length, + uint64_t track_number, uint64_t abs_timecode, + uint64_t duration_timecode) { Frame frame; if (!frame.Init(data, length)) return false; @@ -1826,17 +2194,62 @@ bool Cluster::AddMetadata(const uint8* data, uint64 length, uint64 track_number, frame.set_timestamp(abs_timecode); frame.set_duration(duration_timecode); frame.set_is_key(true); // All metadata blocks are keyframes. - return DoWriteFrame(&frame); + return QueueOrWriteFrame(&frame); } -void Cluster::AddPayloadSize(uint64 size) { payload_size_ += size; } +void Cluster::AddPayloadSize(uint64_t size) { payload_size_ += size; } bool Cluster::Finalize() { - if (!writer_ || finalized_ || size_position_ == -1) + return !write_last_frame_with_duration_ && Finalize(false, 0); +} + +bool Cluster::Finalize(bool set_last_frame_duration, uint64_t duration) { + if (!writer_ || finalized_) + return false; + + if (write_last_frame_with_duration_) { + // Write out held back Frames. This essentially performs a k-way merge + // across all tracks in the increasing order of timestamps. + while (!stored_frames_.empty()) { + Frame* frame = stored_frames_.begin()->second.front(); + + // Get the next frame to write (frame with least timestamp across all + // tracks). + for (FrameMapIterator frames_iterator = ++stored_frames_.begin(); + frames_iterator != stored_frames_.end(); ++frames_iterator) { + if (frames_iterator->second.front()->timestamp() < frame->timestamp()) { + frame = frames_iterator->second.front(); + } + } + + // Set the duration if it's the last frame for the track. + if (set_last_frame_duration && + stored_frames_[frame->track_number()].size() == 1 && + !frame->duration_set()) { + frame->set_duration(duration - frame->timestamp()); + if (!frame->is_key() && !frame->reference_block_timestamp_set()) { + frame->set_reference_block_timestamp( + last_block_timestamp_[frame->track_number()]); + } + } + + // Write the frame and remove it from |stored_frames_|. + const bool wrote_frame = DoWriteFrame(frame); + stored_frames_[frame->track_number()].pop_front(); + if (stored_frames_[frame->track_number()].empty()) { + stored_frames_.erase(frame->track_number()); + } + delete frame; + if (!wrote_frame) + return false; + } + } + + if (size_position_ == -1) return false; if (writer_->Seekable()) { - const int64 pos = writer_->Position(); + const int64_t pos = writer_->Position(); if (writer_->Position(size_position_)) return false; @@ -1853,9 +2266,10 @@ bool Cluster::Finalize() { return true; } -uint64 Cluster::Size() const { - const uint64 element_size = - EbmlMasterElementSize(kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + payload_size_; +uint64_t Cluster::Size() const { + const uint64_t element_size = + EbmlMasterElementSize(libwebm::kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + + payload_size_; return element_size; } @@ -1871,15 +2285,15 @@ bool Cluster::PreWriteBlock() { return true; } -void Cluster::PostWriteBlock(uint64 element_size) { +void Cluster::PostWriteBlock(uint64_t element_size) { AddPayloadSize(element_size); ++blocks_added_; } -int64 Cluster::GetRelativeTimecode(int64 abs_timecode) const { - const int64 cluster_timecode = this->Cluster::timecode(); - const int64 rel_timecode = - static_cast<int64>(abs_timecode) - cluster_timecode; +int64_t Cluster::GetRelativeTimecode(int64_t abs_timecode) const { + const int64_t cluster_timecode = this->Cluster::timecode(); + const int64_t rel_timecode = + static_cast<int64_t>(abs_timecode) - cluster_timecode; if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode) return -1; @@ -1894,11 +2308,67 @@ bool Cluster::DoWriteFrame(const Frame* const frame) { if (!PreWriteBlock()) return false; - const uint64 element_size = WriteFrame(writer_, frame, this); + const uint64_t element_size = WriteFrame(writer_, frame, this); if (element_size == 0) return false; PostWriteBlock(element_size); + last_block_timestamp_[frame->track_number()] = frame->timestamp(); + return true; +} + +bool Cluster::QueueOrWriteFrame(const Frame* const frame) { + if (!frame || !frame->IsValid()) + return false; + + // If |write_last_frame_with_duration_| is not set, then write the frame right + // away. + if (!write_last_frame_with_duration_) { + return DoWriteFrame(frame); + } + + // Queue the current frame. + uint64_t track_number = frame->track_number(); + Frame* const frame_to_store = new Frame(); + frame_to_store->CopyFrom(*frame); + stored_frames_[track_number].push_back(frame_to_store); + + // Iterate through all queued frames in the current track except the last one + // and write it if it is okay to do so (i.e.) no other track has an held back + // frame with timestamp <= the timestamp of the frame in question. + std::vector<std::list<Frame*>::iterator> frames_to_erase; + for (std::list<Frame *>::iterator + current_track_iterator = stored_frames_[track_number].begin(), + end = --stored_frames_[track_number].end(); + current_track_iterator != end; ++current_track_iterator) { + const Frame* const frame_to_write = *current_track_iterator; + bool okay_to_write = true; + for (FrameMapIterator track_iterator = stored_frames_.begin(); + track_iterator != stored_frames_.end(); ++track_iterator) { + if (track_iterator->first == track_number) { + continue; + } + if (track_iterator->second.front()->timestamp() < + frame_to_write->timestamp()) { + okay_to_write = false; + break; + } + } + if (okay_to_write) { + const bool wrote_frame = DoWriteFrame(frame_to_write); + delete frame_to_write; + if (!wrote_frame) + return false; + frames_to_erase.push_back(current_track_iterator); + } else { + break; + } + } + for (std::vector<std::list<Frame*>::iterator>::iterator iterator = + frames_to_erase.begin(); + iterator != frames_to_erase.end(); ++iterator) { + stored_frames_[track_number].erase(*iterator); + } return true; } @@ -1906,7 +2376,7 @@ bool Cluster::WriteClusterHeader() { if (finalized_) return false; - if (WriteID(writer_, kMkvCluster)) + if (WriteID(writer_, libwebm::kMkvCluster)) return false; // Save for later. @@ -1917,9 +2387,12 @@ bool Cluster::WriteClusterHeader() { if (SerializeInt(writer_, kEbmlUnknownValue, 8)) return false; - if (!WriteEbmlElement(writer_, kMkvTimecode, timecode())) + if (!WriteEbmlElement(writer_, libwebm::kMkvTimecode, timecode(), + fixed_size_timecode_ ? 8 : 0)) { return false; - AddPayloadSize(EbmlElementSize(kMkvTimecode, timecode())); + } + AddPayloadSize(EbmlElementSize(libwebm::kMkvTimecode, timecode(), + fixed_size_timecode_ ? 8 : 0)); header_written_ = true; return true; @@ -1930,7 +2403,7 @@ bool Cluster::WriteClusterHeader() { // SeekHead Class SeekHead::SeekHead() : start_pos_(0ULL) { - for (int32 i = 0; i < kSeekEntryCount; ++i) { + for (int32_t i = 0; i < kSeekEntryCount; ++i) { seek_entry_id_[i] = 0; seek_entry_pos_[i] = 0; } @@ -1943,17 +2416,19 @@ bool SeekHead::Finalize(IMkvWriter* writer) const { if (start_pos_ == -1) return false; - uint64 payload_size = 0; - uint64 entry_size[kSeekEntryCount]; + uint64_t payload_size = 0; + uint64_t entry_size[kSeekEntryCount]; - for (int32 i = 0; i < kSeekEntryCount; ++i) { + for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] != 0) { - entry_size[i] = - EbmlElementSize(kMkvSeekID, static_cast<uint64>(seek_entry_id_[i])); - entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]); + entry_size[i] = EbmlElementSize( + libwebm::kMkvSeekID, static_cast<uint64_t>(seek_entry_id_[i])); + entry_size[i] += + EbmlElementSize(libwebm::kMkvSeekPosition, seek_entry_pos_[i]); payload_size += - EbmlMasterElementSize(kMkvSeek, entry_size[i]) + entry_size[i]; + EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) + + entry_size[i]; } } @@ -1961,34 +2436,35 @@ bool SeekHead::Finalize(IMkvWriter* writer) const { if (payload_size == 0) return true; - const int64 pos = writer->Position(); + const int64_t pos = writer->Position(); if (writer->Position(start_pos_)) return false; - if (!WriteEbmlMasterElement(writer, kMkvSeekHead, payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeekHead, payload_size)) return false; - for (int32 i = 0; i < kSeekEntryCount; ++i) { + for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] != 0) { - if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i])) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeek, entry_size[i])) return false; - if (!WriteEbmlElement(writer, kMkvSeekID, - static_cast<uint64>(seek_entry_id_[i]))) + if (!WriteEbmlElement(writer, libwebm::kMkvSeekID, + static_cast<uint64_t>(seek_entry_id_[i]))) return false; - if (!WriteEbmlElement(writer, kMkvSeekPosition, seek_entry_pos_[i])) + if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition, + seek_entry_pos_[i])) return false; } } - const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize(); - const uint64 total_size = - EbmlMasterElementSize(kMkvSeekHead, total_entry_size) + + const uint64_t total_entry_size = kSeekEntryCount * MaxEntrySize(); + const uint64_t total_size = + EbmlMasterElementSize(libwebm::kMkvSeekHead, total_entry_size) + total_entry_size; - const int64 size_left = total_size - (writer->Position() - start_pos_); + const int64_t size_left = total_size - (writer->Position() - start_pos_); - const uint64 bytes_written = WriteVoidElement(writer, size_left); + const uint64_t bytes_written = WriteVoidElement(writer, size_left); if (!bytes_written) return false; @@ -2000,20 +2476,21 @@ bool SeekHead::Finalize(IMkvWriter* writer) const { } bool SeekHead::Write(IMkvWriter* writer) { - const uint64 entry_size = kSeekEntryCount * MaxEntrySize(); - const uint64 size = EbmlMasterElementSize(kMkvSeekHead, entry_size); + const uint64_t entry_size = kSeekEntryCount * MaxEntrySize(); + const uint64_t size = + EbmlMasterElementSize(libwebm::kMkvSeekHead, entry_size); start_pos_ = writer->Position(); - const uint64 bytes_written = WriteVoidElement(writer, size + entry_size); + const uint64_t bytes_written = WriteVoidElement(writer, size + entry_size); if (!bytes_written) return false; return true; } -bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) { - for (int32 i = 0; i < kSeekEntryCount; ++i) { +bool SeekHead::AddSeekEntry(uint32_t id, uint64_t pos) { + for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] == 0) { seek_entry_id_[i] = id; seek_entry_pos_[i] = pos; @@ -2023,19 +2500,19 @@ bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) { return false; } -uint32 SeekHead::GetId(int index) const { +uint32_t SeekHead::GetId(int index) const { if (index < 0 || index >= kSeekEntryCount) return UINT_MAX; return seek_entry_id_[index]; } -uint64 SeekHead::GetPosition(int index) const { +uint64_t SeekHead::GetPosition(int index) const { if (index < 0 || index >= kSeekEntryCount) return ULLONG_MAX; return seek_entry_pos_[index]; } -bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) { +bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) { if (index < 0 || index >= kSeekEntryCount) return false; seek_entry_id_[index] = id; @@ -2043,12 +2520,12 @@ bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) { return true; } -uint64 SeekHead::MaxEntrySize() const { - const uint64 max_entry_payload_size = - EbmlElementSize(kMkvSeekID, 0xffffffffULL) + - EbmlElementSize(kMkvSeekPosition, 0xffffffffffffffffULL); - const uint64 max_entry_size = - EbmlMasterElementSize(kMkvSeek, max_entry_payload_size) + +uint64_t SeekHead::MaxEntrySize() const { + const uint64_t max_entry_payload_size = + EbmlElementSize(libwebm::kMkvSeekID, UINT64_C(0xffffffff)) + + EbmlElementSize(libwebm::kMkvSeekPosition, UINT64_C(0xffffffffffffffff)); + const uint64_t max_entry_size = + EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) + max_entry_payload_size; return max_entry_size; @@ -2072,10 +2549,10 @@ SegmentInfo::~SegmentInfo() { } bool SegmentInfo::Init() { - int32 major; - int32 minor; - int32 build; - int32 revision; + int32_t major; + int32_t minor; + int32_t build; + int32_t revision; GetVersion(&major, &minor, &build, &revision); char temp[256]; #ifdef _MSC_VER @@ -2115,12 +2592,12 @@ bool SegmentInfo::Finalize(IMkvWriter* writer) const { if (duration_pos_ == -1) return false; - const int64 pos = writer->Position(); + const int64_t pos = writer->Position(); if (writer->Position(duration_pos_)) return false; - if (!WriteEbmlElement(writer, kMkvDuration, + if (!WriteEbmlElement(writer, libwebm::kMkvDuration, static_cast<float>(duration_))) return false; @@ -2136,43 +2613,45 @@ bool SegmentInfo::Write(IMkvWriter* writer) { if (!writer || !muxing_app_ || !writing_app_) return false; - uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_); + uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale, timecode_scale_); if (duration_ > 0.0) - size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_)); + size += + EbmlElementSize(libwebm::kMkvDuration, static_cast<float>(duration_)); if (date_utc_ != LLONG_MIN) - size += EbmlDateElementSize(kMkvDateUTC); - size += EbmlElementSize(kMkvMuxingApp, muxing_app_); - size += EbmlElementSize(kMkvWritingApp, writing_app_); + size += EbmlDateElementSize(libwebm::kMkvDateUTC); + size += EbmlElementSize(libwebm::kMkvMuxingApp, muxing_app_); + size += EbmlElementSize(libwebm::kMkvWritingApp, writing_app_); - if (!WriteEbmlMasterElement(writer, kMkvInfo, size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvInfo, size)) return false; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; - if (!WriteEbmlElement(writer, kMkvTimecodeScale, timecode_scale_)) + if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale, timecode_scale_)) return false; if (duration_ > 0.0) { // Save for later duration_pos_ = writer->Position(); - if (!WriteEbmlElement(writer, kMkvDuration, static_cast<float>(duration_))) + if (!WriteEbmlElement(writer, libwebm::kMkvDuration, + static_cast<float>(duration_))) return false; } if (date_utc_ != LLONG_MIN) - WriteEbmlDateElement(writer, kMkvDateUTC, date_utc_); + WriteEbmlDateElement(writer, libwebm::kMkvDateUTC, date_utc_); - if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_)) + if (!WriteEbmlElement(writer, libwebm::kMkvMuxingApp, muxing_app_)) return false; - if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_)) + if (!WriteEbmlElement(writer, libwebm::kMkvWritingApp, writing_app_)) return false; - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(size)) + stop_position - payload_position != static_cast<int64_t>(size)) return false; return true; @@ -2244,6 +2723,8 @@ Segment::Segment() mode_(kFile), new_cuepoint_(false), output_cues_(true), + accurate_cluster_duration_(false), + fixed_size_cluster_timecode_(false), payload_pos_(0), size_position_(0), doc_type_version_(kDefaultDocTypeVersion), @@ -2260,7 +2741,7 @@ Segment::Segment() Segment::~Segment() { if (cluster_list_) { - for (int32 i = 0; i < cluster_list_size_; ++i) { + for (int32_t i = 0; i < cluster_list_size_; ++i) { Cluster* const cluster = cluster_list_[i]; delete cluster; } @@ -2268,7 +2749,7 @@ Segment::~Segment() { } if (frames_) { - for (int32 i = 0; i < frames_size_; ++i) { + for (int32_t i = 0; i < frames_size_; ++i) { Frame* const frame = frames_[i]; delete frame; } @@ -2292,13 +2773,13 @@ Segment::~Segment() { } } -void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index, - uint64* cues_size) { +void Segment::MoveCuesBeforeClustersHelper(uint64_t diff, int32_t index, + uint64_t* cues_size) { CuePoint* const cue_point = cues_.GetCueByIndex(index); if (cue_point == NULL) return; - const uint64 old_cue_point_size = cue_point->Size(); - const uint64 cluster_pos = cue_point->cluster_pos() + diff; + const uint64_t old_cue_point_size = cue_point->Size(); + const uint64_t cluster_pos = cue_point->cluster_pos() + diff; cue_point->set_cluster_pos(cluster_pos); // update the new cluster position // New size of the cue is computed as follows // Let a = current sum of size of all CuePoints @@ -2308,40 +2789,40 @@ void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index, // Let d = b + c. Now d is the |diff| passed to the next recursive call. // Let e = a + b. Now e is the |cues_size| passed to the next recursive // call. - const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size; - const uint64 cue_size_diff = + const uint64_t cue_point_size_diff = cue_point->Size() - old_cue_point_size; + const uint64_t cue_size_diff = GetCodedUIntSize(*cues_size + cue_point_size_diff) - GetCodedUIntSize(*cues_size); *cues_size += cue_point_size_diff; diff = cue_size_diff + cue_point_size_diff; if (diff > 0) { - for (int32 i = 0; i < cues_.cue_entries_size(); ++i) { + for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) { MoveCuesBeforeClustersHelper(diff, i, cues_size); } } } void Segment::MoveCuesBeforeClusters() { - const uint64 current_cue_size = cues_.Size(); - uint64 cue_size = 0; - for (int32 i = 0; i < cues_.cue_entries_size(); ++i) + const uint64_t current_cue_size = cues_.Size(); + uint64_t cue_size = 0; + for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) cue_size += cues_.GetCueByIndex(i)->Size(); - for (int32 i = 0; i < cues_.cue_entries_size(); ++i) + for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size); // Adjust the Seek Entry to reflect the change in position // of Cluster and Cues - int32 cluster_index = 0; - int32 cues_index = 0; - for (int32 i = 0; i < SeekHead::kSeekEntryCount; ++i) { - if (seek_head_.GetId(i) == kMkvCluster) + int32_t cluster_index = 0; + int32_t cues_index = 0; + for (int32_t i = 0; i < SeekHead::kSeekEntryCount; ++i) { + if (seek_head_.GetId(i) == libwebm::kMkvCluster) cluster_index = i; - if (seek_head_.GetId(i) == kMkvCues) + if (seek_head_.GetId(i) == libwebm::kMkvCues) cues_index = i; } - seek_head_.SetSeekEntry(cues_index, kMkvCues, + seek_head_.SetSeekEntry(cues_index, libwebm::kMkvCues, seek_head_.GetPosition(cluster_index)); - seek_head_.SetSeekEntry(cluster_index, kMkvCluster, + seek_head_.SetSeekEntry(cluster_index, libwebm::kMkvCluster, cues_.Size() + seek_head_.GetPosition(cues_index)); } @@ -2359,8 +2840,8 @@ bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, IMkvWriter* writer) { if (!writer->Seekable() || chunking_) return false; - const int64 cluster_offset = - cluster_list_[0]->size_position() - GetUIntSize(kMkvCluster); + const int64_t cluster_offset = + cluster_list_[0]->size_position() - GetUIntSize(libwebm::kMkvCluster); // Copy the headers. if (!ChunkedCopy(reader, writer, 0, cluster_offset)) @@ -2383,8 +2864,8 @@ bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, return false; // Update the Segment size in case the Cues size has changed. - const int64 pos = writer->Position(); - const int64 segment_size = writer->Position() - payload_pos_; + const int64_t pos = writer->Position(); + const int64_t segment_size = writer->Position() - payload_pos_; if (writer->Position(size_position_) || WriteUIntSize(writer, segment_size, 8) || writer->Position(pos)) return false; @@ -2395,15 +2876,17 @@ bool Segment::Finalize() { if (WriteFramesAll() < 0) return false; - if (mode_ == kFile) { - if (cluster_list_size_ > 0) { - // Update last cluster's size - Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; + if (cluster_list_size_ > 0) { + // Update last cluster's size + Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; - if (!old_cluster || !old_cluster->Finalize()) - return false; - } + // For the last frame of the last Cluster, we don't write it as a BlockGroup + // with Duration unless the frame itself has duration set explicitly. + if (!old_cluster || !old_cluster->Finalize(false, 0)) + return false; + } + if (mode_ == kFile) { if (chunking_ && chunk_writer_cluster_) { chunk_writer_cluster_->Close(); chunk_count_++; @@ -2417,7 +2900,7 @@ bool Segment::Finalize() { return false; if (output_cues_) - if (!seek_head_.AddSeekEntry(kMkvCues, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvCues, MaxOffset())) return false; if (chunking_) { @@ -2448,11 +2931,11 @@ bool Segment::Finalize() { if (size_position_ == -1) return false; - const int64 segment_size = MaxOffset(); + const int64_t segment_size = MaxOffset(); if (segment_size < 1) return false; - const int64 pos = writer_header_->Position(); + const int64_t pos = writer_header_->Position(); UpdateDocTypeVersion(); if (doc_type_version_ != doc_type_version_written_) { if (writer_header_->Position(0)) @@ -2490,7 +2973,7 @@ bool Segment::Finalize() { return true; } -Track* Segment::AddTrack(int32 number) { +Track* Segment::AddTrack(int32_t number) { Track* const track = new (std::nothrow) Track(&seed_); // NOLINT if (!track) @@ -2508,7 +2991,7 @@ Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); } Tag* Segment::AddTag() { return tags_.AddTag(); } -uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) { +uint64_t Segment::AddVideoTrack(int32_t width, int32_t height, int32_t number) { VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT if (!track) return 0; @@ -2524,7 +3007,7 @@ uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) { return track->number(); } -bool Segment::AddCuePoint(uint64 timestamp, uint64 track) { +bool Segment::AddCuePoint(uint64_t timestamp, uint64_t track) { if (cluster_list_size_ < 1) return false; @@ -2547,7 +3030,8 @@ bool Segment::AddCuePoint(uint64 timestamp, uint64 track) { return true; } -uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) { +uint64_t Segment::AddAudioTrack(int32_t sample_rate, int32_t channels, + int32_t number) { AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT if (!track) return 0; @@ -2562,8 +3046,8 @@ uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) { return track->number(); } -bool Segment::AddFrame(const uint8* data, uint64 length, uint64 track_number, - uint64 timestamp, bool is_key) { +bool Segment::AddFrame(const uint8_t* data, uint64_t length, + uint64_t track_number, uint64_t timestamp, bool is_key) { if (!data) return false; @@ -2576,11 +3060,11 @@ bool Segment::AddFrame(const uint8* data, uint64 length, uint64 track_number, return AddGenericFrame(&frame); } -bool Segment::AddFrameWithAdditional(const uint8* data, uint64 length, - const uint8* additional, - uint64 additional_length, uint64 add_id, - uint64 track_number, uint64 timestamp, - bool is_key) { +bool Segment::AddFrameWithAdditional(const uint8_t* data, uint64_t length, + const uint8_t* additional, + uint64_t additional_length, + uint64_t add_id, uint64_t track_number, + uint64_t timestamp, bool is_key) { if (!data || !additional) return false; @@ -2595,10 +3079,10 @@ bool Segment::AddFrameWithAdditional(const uint8* data, uint64 length, return AddGenericFrame(&frame); } -bool Segment::AddFrameWithDiscardPadding(const uint8* data, uint64 length, - int64 discard_padding, - uint64 track_number, uint64 timestamp, - bool is_key) { +bool Segment::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, + int64_t discard_padding, + uint64_t track_number, + uint64_t timestamp, bool is_key) { if (!data) return false; @@ -2612,8 +3096,9 @@ bool Segment::AddFrameWithDiscardPadding(const uint8* data, uint64 length, return AddGenericFrame(&frame); } -bool Segment::AddMetadata(const uint8* data, uint64 length, uint64 track_number, - uint64 timestamp_ns, uint64 duration_ns) { +bool Segment::AddMetadata(const uint8_t* data, uint64_t length, + uint64_t track_number, uint64_t timestamp_ns, + uint64_t duration_ns) { if (!data) return false; @@ -2702,6 +3187,14 @@ bool Segment::AddGenericFrame(const Frame* frame) { void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; } +void Segment::AccurateClusterDuration(bool accurate_cluster_duration) { + accurate_cluster_duration_ = accurate_cluster_duration; +} + +void Segment::UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode) { + fixed_size_cluster_timecode_ = fixed_size_cluster_timecode; +} + bool Segment::SetChunking(bool chunking, const char* filename) { if (chunk_count_ > 0) return false; @@ -2781,7 +3274,7 @@ bool Segment::SetChunking(bool chunking, const char* filename) { return true; } -bool Segment::CuesTrack(uint64 track_number) { +bool Segment::CuesTrack(uint64_t track_number) { const Track* const track = GetTrackByNumber(track_number); if (!track) return false; @@ -2792,7 +3285,7 @@ bool Segment::CuesTrack(uint64 track_number) { void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; } -Track* Segment::GetTrackByNumber(uint64 track_number) const { +Track* Segment::GetTrackByNumber(uint64_t track_number) const { return tracks_.GetTrackByNumber(track_number); } @@ -2803,11 +3296,11 @@ bool Segment::WriteSegmentHeader() { if (!WriteEbmlHeader(writer_header_, doc_type_version_)) return false; doc_type_version_written_ = doc_type_version_; - ebml_header_size_ = static_cast<int32>(writer_header_->Position()); + ebml_header_size_ = static_cast<int32_t>(writer_header_->Position()); // Write "unknown" (-1) as segment size value. If mode is kFile, Segment // will write over duration when the file is finalized. - if (WriteID(writer_header_, kMkvSegment)) + if (WriteID(writer_header_, libwebm::kMkvSegment)) return false; // Save for later. @@ -2831,25 +3324,25 @@ bool Segment::WriteSegmentHeader() { return false; } - if (!seek_head_.AddSeekEntry(kMkvInfo, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvInfo, MaxOffset())) return false; if (!segment_info_.Write(writer_header_)) return false; - if (!seek_head_.AddSeekEntry(kMkvTracks, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvTracks, MaxOffset())) return false; if (!tracks_.Write(writer_header_)) return false; if (chapters_.Count() > 0) { - if (!seek_head_.AddSeekEntry(kMkvChapters, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvChapters, MaxOffset())) return false; if (!chapters_.Write(writer_header_)) return false; } if (tags_.Count() > 0) { - if (!seek_head_.AddSeekEntry(kMkvTags, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvTags, MaxOffset())) return false; if (!tags_.Write(writer_header_)) return false; @@ -2870,7 +3363,7 @@ bool Segment::WriteSegmentHeader() { // Here we are testing whether to create a new cluster, given a frame // having time frame_timestamp_ns. // -int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, +int Segment::TestFrame(uint64_t track_number, uint64_t frame_timestamp_ns, bool is_key) const { if (force_new_cluster_) return 1; @@ -2888,11 +3381,11 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, // written to the existing cluster, or that a new cluster should be // created. - const uint64 timecode_scale = segment_info_.timecode_scale(); - const uint64 frame_timecode = frame_timestamp_ns / timecode_scale; + const uint64_t timecode_scale = segment_info_.timecode_scale(); + const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; - const uint64 last_cluster_timecode = last_cluster->timecode(); + const uint64_t last_cluster_timecode = last_cluster->timecode(); // For completeness we test for the case when the frame's timecode // is less than the cluster's timecode. Although in principle that @@ -2907,7 +3400,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, // using a 16-bit signed integer), then we cannot write this frame // to that cluster, and so we must create a new cluster. - const int64 delta_timecode = frame_timecode - last_cluster_timecode; + const int64_t delta_timecode = frame_timecode - last_cluster_timecode; if (delta_timecode > kMaxBlockTimecode) return 2; @@ -2923,7 +3416,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, // already, where "too many" is defined as "the total time of frames // in the cluster exceeds a threshold". - const uint64 delta_ns = delta_timecode * timecode_scale; + const uint64_t delta_ns = delta_timecode * timecode_scale; if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_) return 1; @@ -2932,7 +3425,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, // cluster is created when the size of the current cluster exceeds a // threshold. - const uint64 cluster_size = last_cluster->payload_size(); + const uint64_t cluster_size = last_cluster->payload_size(); if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_) return 1; @@ -2942,19 +3435,19 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns, return 0; } -bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { - const int32 new_size = cluster_list_size_ + 1; +bool Segment::MakeNewCluster(uint64_t frame_timestamp_ns) { + const int32_t new_size = cluster_list_size_ + 1; if (new_size > cluster_list_capacity_) { // Add more clusters. - const int32 new_capacity = + const int32_t new_capacity = (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2; Cluster** const clusters = new (std::nothrow) Cluster*[new_capacity]; // NOLINT if (!clusters) return false; - for (int32 i = 0; i < cluster_list_size_; ++i) { + for (int32_t i = 0; i < cluster_list_size_; ++i) { clusters[i] = cluster_list_[i]; } @@ -2967,19 +3460,17 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { if (!WriteFramesLessThan(frame_timestamp_ns)) return false; - if (mode_ == kFile) { - if (cluster_list_size_ > 0) { - // Update old cluster's size - Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; + if (cluster_list_size_ > 0) { + // Update old cluster's size + Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; - if (!old_cluster || !old_cluster->Finalize()) - return false; - } - - if (output_cues_) - new_cuepoint_ = true; + if (!old_cluster || !old_cluster->Finalize(true, frame_timestamp_ns)) + return false; } + if (output_cues_) + new_cuepoint_ = true; + if (chunking_ && cluster_list_size_ > 0) { chunk_writer_cluster_->Close(); chunk_count_++; @@ -2990,24 +3481,25 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { return false; } - const uint64 timecode_scale = segment_info_.timecode_scale(); - const uint64 frame_timecode = frame_timestamp_ns / timecode_scale; + const uint64_t timecode_scale = segment_info_.timecode_scale(); + const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; - uint64 cluster_timecode = frame_timecode; + uint64_t cluster_timecode = frame_timecode; if (frames_size_ > 0) { const Frame* const f = frames_[0]; // earliest queued frame - const uint64 ns = f->timestamp(); - const uint64 tc = ns / timecode_scale; + const uint64_t ns = f->timestamp(); + const uint64_t tc = ns / timecode_scale; if (tc < cluster_timecode) cluster_timecode = tc; } Cluster*& cluster = cluster_list_[cluster_list_size_]; - const int64 offset = MaxOffset(); - cluster = new (std::nothrow) Cluster(cluster_timecode, // NOLINT - offset, segment_info_.timecode_scale()); + const int64_t offset = MaxOffset(); + cluster = new (std::nothrow) + Cluster(cluster_timecode, offset, segment_info_.timecode_scale(), + accurate_cluster_duration_, fixed_size_cluster_timecode_); if (!cluster) return false; @@ -3018,8 +3510,8 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { return true; } -bool Segment::DoNewClusterProcessing(uint64 track_number, - uint64 frame_timestamp_ns, bool is_key) { +bool Segment::DoNewClusterProcessing(uint64_t track_number, + uint64_t frame_timestamp_ns, bool is_key) { for (;;) { // Based on the characteristics of the current frame and current // cluster, decide whether to create a new cluster. @@ -3055,12 +3547,12 @@ bool Segment::CheckHeaderInfo() { if (!WriteSegmentHeader()) return false; - if (!seek_head_.AddSeekEntry(kMkvCluster, MaxOffset())) + if (!seek_head_.AddSeekEntry(libwebm::kMkvCluster, MaxOffset())) return false; if (output_cues_ && cues_track_ == 0) { // Check for a video track - for (uint32 i = 0; i < tracks_.track_entries_size(); ++i) { + for (uint32_t i = 0; i < tracks_.track_entries_size(); ++i) { const Track* const track = tracks_.GetTrackByIndex(i); if (!track) return false; @@ -3085,7 +3577,7 @@ bool Segment::CheckHeaderInfo() { } void Segment::UpdateDocTypeVersion() { - for (uint32 index = 0; index < tracks_.track_entries_size(); ++index) { + for (uint32_t index = 0; index < tracks_.track_entries_size(); ++index) { const Track* track = tracks_.GetTrackByIndex(index); if (track == NULL) break; @@ -3127,14 +3619,14 @@ bool Segment::UpdateChunkName(const char* ext, char** name) const { return true; } -int64 Segment::MaxOffset() { +int64_t Segment::MaxOffset() { if (!writer_header_) return -1; - int64 offset = writer_header_->Position() - payload_pos_; + int64_t offset = writer_header_->Position() - payload_pos_; if (chunking_) { - for (int32 i = 0; i < cluster_list_size_; ++i) { + for (int32_t i = 0; i < cluster_list_size_; ++i) { Cluster* const cluster = cluster_list_[i]; offset += cluster->Size(); } @@ -3147,11 +3639,11 @@ int64 Segment::MaxOffset() { } bool Segment::QueueFrame(Frame* frame) { - const int32 new_size = frames_size_ + 1; + const int32_t new_size = frames_size_ + 1; if (new_size > frames_capacity_) { // Add more frames. - const int32 new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2; + const int32_t new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2; if (new_capacity < 1) return false; @@ -3160,7 +3652,7 @@ bool Segment::QueueFrame(Frame* frame) { if (!frames) return false; - for (int32 i = 0; i < frames_size_; ++i) { + for (int32_t i = 0; i < frames_size_; ++i) { frames[i] = frames_[i]; } @@ -3186,7 +3678,7 @@ int Segment::WriteFramesAll() { if (!cluster) return -1; - for (int32 i = 0; i < frames_size_; ++i) { + for (int32_t i = 0; i < frames_size_; ++i) { Frame*& frame = frames_[i]; // TODO(jzern/vigneshv): using Segment::AddGenericFrame here would limit the // places where |doc_type_version_| needs to be updated. @@ -3215,7 +3707,7 @@ int Segment::WriteFramesAll() { return result; } -bool Segment::WriteFramesLessThan(uint64 timestamp) { +bool Segment::WriteFramesLessThan(uint64_t timestamp) { // Check |cluster_list_size_| to see if this is the first cluster. If it is // the first cluster the audio frames that are less than the first video // timesatmp will be written in a later step. @@ -3227,11 +3719,11 @@ bool Segment::WriteFramesLessThan(uint64 timestamp) { if (!cluster) return false; - int32 shift_left = 0; + int32_t shift_left = 0; // TODO(fgalligan): Change this to use the durations of frames instead of // the next frame's start time if the duration is accurate. - for (int32 i = 1; i < frames_size_; ++i) { + for (int32_t i = 1; i < frames_size_; ++i) { const Frame* const frame_curr = frames_[i]; if (frame_curr->timestamp() > timestamp) @@ -3262,8 +3754,8 @@ bool Segment::WriteFramesLessThan(uint64 timestamp) { if (shift_left >= frames_size_) return false; - const int32 new_frames_size = frames_size_ - shift_left; - for (int32 i = 0; i < new_frames_size; ++i) { + const int32_t new_frames_size = frames_size_ - shift_left; + for (int32_t i = 0; i < new_frames_size; ++i) { frames_[i] = frames_[i + shift_left]; } diff --git a/libvpx/third_party/libwebm/mkvmuxer.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h index 03a002c93..55ba07196 100644 --- a/libvpx/third_party/libwebm/mkvmuxer.hpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h @@ -6,24 +6,31 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#ifndef MKVMUXER_HPP -#define MKVMUXER_HPP +#ifndef MKVMUXER_MKVMUXER_H_ +#define MKVMUXER_MKVMUXER_H_ -#include "mkvmuxertypes.hpp" +#include <stdint.h> + +#include <cstddef> +#include <list> +#include <map> + +#include "common/webmids.h" +#include "mkvmuxer/mkvmuxertypes.h" // For a description of the WebM elements see // http://www.webmproject.org/code/specs/container/. namespace mkvparser { class IMkvReader; -} // end namespace +} // namespace mkvparser namespace mkvmuxer { class MkvWriter; class Segment; -const uint64 kMaxTrackNumber = 126; +const uint64_t kMaxTrackNumber = 126; /////////////////////////////////////////////////////////////// // Interface used by the mkvmuxer to write out the Mkv data. @@ -59,15 +66,15 @@ class IMkvWriter { // Writes out the EBML header for a WebM file. This function must be called // before any other libwebm writing functions are called. -bool WriteEbmlHeader(IMkvWriter* writer, uint64 doc_type_version); +bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version); // Deprecated. Writes out EBML header with doc_type_version as // kDefaultDocTypeVersion. Exists for backward compatibility. bool WriteEbmlHeader(IMkvWriter* writer); // Copies in Chunk from source to destination between the given byte positions -bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64 start, - int64 size); +bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64_t start, + int64_t size); /////////////////////////////////////////////////////////////// // Class to hold data the will be written to a block. @@ -81,10 +88,11 @@ class Frame { bool CopyFrom(const Frame& frame); // Copies |frame| data into |frame_|. Returns true on success. - bool Init(const uint8* frame, uint64 length); + bool Init(const uint8_t* frame, uint64_t length); // Copies |additional| data into |additional_|. Returns true on success. - bool AddAdditionalData(const uint8* additional, uint64 length, uint64 add_id); + bool AddAdditionalData(const uint8_t* additional, uint64_t length, + uint64_t add_id); // Returns true if the frame has valid parameters. bool IsValid() const; @@ -93,62 +101,70 @@ class Frame { // parameters. bool CanBeSimpleBlock() const; - uint64 add_id() const { return add_id_; } - const uint8* additional() const { return additional_; } - uint64 additional_length() const { return additional_length_; } - void set_duration(uint64 duration) { duration_ = duration; } - uint64 duration() const { return duration_; } - const uint8* frame() const { return frame_; } + uint64_t add_id() const { return add_id_; } + const uint8_t* additional() const { return additional_; } + uint64_t additional_length() const { return additional_length_; } + void set_duration(uint64_t duration); + uint64_t duration() const { return duration_; } + bool duration_set() const { return duration_set_; } + const uint8_t* frame() const { return frame_; } void set_is_key(bool key) { is_key_ = key; } bool is_key() const { return is_key_; } - uint64 length() const { return length_; } - void set_track_number(uint64 track_number) { track_number_ = track_number; } - uint64 track_number() const { return track_number_; } - void set_timestamp(uint64 timestamp) { timestamp_ = timestamp; } - uint64 timestamp() const { return timestamp_; } - void set_discard_padding(int64 discard_padding) { + uint64_t length() const { return length_; } + void set_track_number(uint64_t track_number) { track_number_ = track_number; } + uint64_t track_number() const { return track_number_; } + void set_timestamp(uint64_t timestamp) { timestamp_ = timestamp; } + uint64_t timestamp() const { return timestamp_; } + void set_discard_padding(int64_t discard_padding) { discard_padding_ = discard_padding; } - int64 discard_padding() const { return discard_padding_; } - void set_reference_block_timestamp(int64 reference_block_timestamp); - int64 reference_block_timestamp() const { return reference_block_timestamp_; } + int64_t discard_padding() const { return discard_padding_; } + void set_reference_block_timestamp(int64_t reference_block_timestamp); + int64_t reference_block_timestamp() const { + return reference_block_timestamp_; + } bool reference_block_timestamp_set() const { return reference_block_timestamp_set_; } private: // Id of the Additional data. - uint64 add_id_; + uint64_t add_id_; // Pointer to additional data. Owned by this class. - uint8* additional_; + uint8_t* additional_; // Length of the additional data. - uint64 additional_length_; + uint64_t additional_length_; // Duration of the frame in nanoseconds. - uint64 duration_; + uint64_t duration_; + + // Flag indicating that |duration_| has been set. Setting duration causes the + // frame to be written out as a Block with BlockDuration instead of as a + // SimpleBlock. + bool duration_set_; // Pointer to the data. Owned by this class. - uint8* frame_; + uint8_t* frame_; // Flag telling if the data should set the key flag of a block. bool is_key_; // Length of the data. - uint64 length_; + uint64_t length_; // Mkv track number the data is associated with. - uint64 track_number_; + uint64_t track_number_; // Timestamp of the data in nanoseconds. - uint64 timestamp_; + uint64_t timestamp_; // Discard padding for the frame. - int64 discard_padding_; + int64_t discard_padding_; // Reference block timestamp. - int64 reference_block_timestamp_; + int64_t reference_block_timestamp_; // Flag indicating if |reference_block_timestamp_| has been set. bool reference_block_timestamp_set_; @@ -164,19 +180,19 @@ class CuePoint { ~CuePoint(); // Returns the size in bytes for the entire CuePoint element. - uint64 Size() const; + uint64_t Size() const; // Output the CuePoint element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; - void set_time(uint64 time) { time_ = time; } - uint64 time() const { return time_; } - void set_track(uint64 track) { track_ = track; } - uint64 track() const { return track_; } - void set_cluster_pos(uint64 cluster_pos) { cluster_pos_ = cluster_pos; } - uint64 cluster_pos() const { return cluster_pos_; } - void set_block_number(uint64 block_number) { block_number_ = block_number; } - uint64 block_number() const { return block_number_; } + void set_time(uint64_t time) { time_ = time; } + uint64_t time() const { return time_; } + void set_track(uint64_t track) { track_ = track; } + uint64_t track() const { return track_; } + void set_cluster_pos(uint64_t cluster_pos) { cluster_pos_ = cluster_pos; } + uint64_t cluster_pos() const { return cluster_pos_; } + void set_block_number(uint64_t block_number) { block_number_ = block_number; } + uint64_t block_number() const { return block_number_; } void set_output_block_number(bool output_block_number) { output_block_number_ = output_block_number; } @@ -184,19 +200,19 @@ class CuePoint { private: // Returns the size in bytes for the payload of the CuePoint element. - uint64 PayloadSize() const; + uint64_t PayloadSize() const; // Absolute timecode according to the segment time base. - uint64 time_; + uint64_t time_; // The Track element associated with the CuePoint. - uint64 track_; + uint64_t track_; // The position of the Cluster containing the Block. - uint64 cluster_pos_; + uint64_t cluster_pos_; // Number of the Block within the Cluster, starting from 1. - uint64 block_number_; + uint64_t block_number_; // If true the muxer will write out the block number for the cue if the // block number is different than the default of 1. Default is set to true. @@ -217,15 +233,15 @@ class Cues { // Returns the cue point by index. Returns NULL if there is no cue point // match. - CuePoint* GetCueByIndex(int32 index) const; + CuePoint* GetCueByIndex(int32_t index) const; // Returns the total size of the Cues element - uint64 Size(); + uint64_t Size(); // Output the Cues element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; - int32 cue_entries_size() const { return cue_entries_size_; } + int32_t cue_entries_size() const { return cue_entries_size_; } void set_output_block_number(bool output_block_number) { output_block_number_ = output_block_number; } @@ -233,10 +249,10 @@ class Cues { private: // Number of allocated elements in |cue_entries_|. - int32 cue_entries_capacity_; + int32_t cue_entries_capacity_; // Number of CuePoints in |cue_entries_|. - int32 cue_entries_size_; + int32_t cue_entries_size_; // CuePoint list. CuePoint** cue_entries_; @@ -258,21 +274,21 @@ class ContentEncAESSettings { ~ContentEncAESSettings() {} // Returns the size in bytes for the ContentEncAESSettings element. - uint64 Size() const; + uint64_t Size() const; // Writes out the ContentEncAESSettings element to |writer|. Returns true on // success. bool Write(IMkvWriter* writer) const; - uint64 cipher_mode() const { return cipher_mode_; } + uint64_t cipher_mode() const { return cipher_mode_; } private: // Returns the size in bytes for the payload of the ContentEncAESSettings // element. - uint64 PayloadSize() const; + uint64_t PayloadSize() const; // Sub elements - uint64 cipher_mode_; + uint64_t cipher_mode_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings); }; @@ -291,45 +307,158 @@ class ContentEncoding { // Sets the content encryption id. Copies |length| bytes from |id| to // |enc_key_id_|. Returns true on success. - bool SetEncryptionID(const uint8* id, uint64 length); + bool SetEncryptionID(const uint8_t* id, uint64_t length); // Returns the size in bytes for the ContentEncoding element. - uint64 Size() const; + uint64_t Size() const; // Writes out the ContentEncoding element to |writer|. Returns true on // success. bool Write(IMkvWriter* writer) const; - uint64 enc_algo() const { return enc_algo_; } - uint64 encoding_order() const { return encoding_order_; } - uint64 encoding_scope() const { return encoding_scope_; } - uint64 encoding_type() const { return encoding_type_; } + uint64_t enc_algo() const { return enc_algo_; } + uint64_t encoding_order() const { return encoding_order_; } + uint64_t encoding_scope() const { return encoding_scope_; } + uint64_t encoding_type() const { return encoding_type_; } ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; } private: // Returns the size in bytes for the encoding elements. - uint64 EncodingSize(uint64 compresion_size, uint64 encryption_size) const; + uint64_t EncodingSize(uint64_t compresion_size, + uint64_t encryption_size) const; // Returns the size in bytes for the encryption elements. - uint64 EncryptionSize() const; + uint64_t EncryptionSize() const; // Track element names - uint64 enc_algo_; - uint8* enc_key_id_; - uint64 encoding_order_; - uint64 encoding_scope_; - uint64 encoding_type_; + uint64_t enc_algo_; + uint8_t* enc_key_id_; + uint64_t encoding_order_; + uint64_t encoding_scope_; + uint64_t encoding_type_; // ContentEncAESSettings element. ContentEncAESSettings enc_aes_settings_; // Size of the ContentEncKeyID data in bytes. - uint64 enc_key_id_length_; + uint64_t enc_key_id_length_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); }; /////////////////////////////////////////////////////////////// +// Colour element. +struct PrimaryChromaticity { + PrimaryChromaticity(float x_val, float y_val) : x(x_val), y(y_val) {} + PrimaryChromaticity() : x(0), y(0) {} + ~PrimaryChromaticity() {} + uint64_t PrimaryChromaticityPayloadSize(libwebm::MkvId x_id, + libwebm::MkvId y_id) const; + bool Write(IMkvWriter* writer, libwebm::MkvId x_id, + libwebm::MkvId y_id) const; + + float x; + float y; +}; + +class MasteringMetadata { + public: + static const float kValueNotPresent; + + MasteringMetadata() + : luminance_max(kValueNotPresent), + luminance_min(kValueNotPresent), + r_(NULL), + g_(NULL), + b_(NULL), + white_point_(NULL) {} + ~MasteringMetadata() { + delete r_; + delete g_; + delete b_; + delete white_point_; + } + + // Returns total size of the MasteringMetadata element. + uint64_t MasteringMetadataSize() const; + bool Write(IMkvWriter* writer) const; + + // Copies non-null chromaticity. + bool SetChromaticity(const PrimaryChromaticity* r, + const PrimaryChromaticity* g, + const PrimaryChromaticity* b, + const PrimaryChromaticity* white_point); + const PrimaryChromaticity* r() const { return r_; } + const PrimaryChromaticity* g() const { return g_; } + const PrimaryChromaticity* b() const { return b_; } + const PrimaryChromaticity* white_point() const { return white_point_; } + + float luminance_max; + float luminance_min; + + private: + // Returns size of MasteringMetadata child elements. + uint64_t PayloadSize() const; + + PrimaryChromaticity* r_; + PrimaryChromaticity* g_; + PrimaryChromaticity* b_; + PrimaryChromaticity* white_point_; +}; + +class Colour { + public: + static const uint64_t kValueNotPresent; + Colour() + : matrix_coefficients(kValueNotPresent), + bits_per_channel(kValueNotPresent), + chroma_subsampling_horz(kValueNotPresent), + chroma_subsampling_vert(kValueNotPresent), + cb_subsampling_horz(kValueNotPresent), + cb_subsampling_vert(kValueNotPresent), + chroma_siting_horz(kValueNotPresent), + chroma_siting_vert(kValueNotPresent), + range(kValueNotPresent), + transfer_characteristics(kValueNotPresent), + primaries(kValueNotPresent), + max_cll(kValueNotPresent), + max_fall(kValueNotPresent), + mastering_metadata_(NULL) {} + ~Colour() { delete mastering_metadata_; } + + // Returns total size of the Colour element. + uint64_t ColourSize() const; + bool Write(IMkvWriter* writer) const; + + // Deep copies |mastering_metadata|. + bool SetMasteringMetadata(const MasteringMetadata& mastering_metadata); + + const MasteringMetadata* mastering_metadata() const { + return mastering_metadata_; + } + + uint64_t matrix_coefficients; + uint64_t bits_per_channel; + uint64_t chroma_subsampling_horz; + uint64_t chroma_subsampling_vert; + uint64_t cb_subsampling_horz; + uint64_t cb_subsampling_vert; + uint64_t chroma_siting_horz; + uint64_t chroma_siting_vert; + uint64_t range; + uint64_t transfer_characteristics; + uint64_t primaries; + uint64_t max_cll; + uint64_t max_fall; + + private: + // Returns size of Colour child elements. + uint64_t PayloadSize() const; + + MasteringMetadata* mastering_metadata_; +}; + +/////////////////////////////////////////////////////////////// // Track element. class Track { public: @@ -342,76 +471,76 @@ class Track { // Returns the ContentEncoding by index. Returns NULL if there is no // ContentEncoding match. - ContentEncoding* GetContentEncodingByIndex(uint32 index) const; + ContentEncoding* GetContentEncodingByIndex(uint32_t index) const; // Returns the size in bytes for the payload of the Track element. - virtual uint64 PayloadSize() const; + virtual uint64_t PayloadSize() const; // Returns the size in bytes of the Track element. - virtual uint64 Size() const; + virtual uint64_t Size() const; // Output the Track element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; // Sets the CodecPrivate element of the Track element. Copies |length| // bytes from |codec_private| to |codec_private_|. Returns true on success. - bool SetCodecPrivate(const uint8* codec_private, uint64 length); + bool SetCodecPrivate(const uint8_t* codec_private, uint64_t length); void set_codec_id(const char* codec_id); const char* codec_id() const { return codec_id_; } - const uint8* codec_private() const { return codec_private_; } + const uint8_t* codec_private() const { return codec_private_; } void set_language(const char* language); const char* language() const { return language_; } - void set_max_block_additional_id(uint64 max_block_additional_id) { + void set_max_block_additional_id(uint64_t max_block_additional_id) { max_block_additional_id_ = max_block_additional_id; } - uint64 max_block_additional_id() const { return max_block_additional_id_; } + uint64_t max_block_additional_id() const { return max_block_additional_id_; } void set_name(const char* name); const char* name() const { return name_; } - void set_number(uint64 number) { number_ = number; } - uint64 number() const { return number_; } - void set_type(uint64 type) { type_ = type; } - uint64 type() const { return type_; } - void set_uid(uint64 uid) { uid_ = uid; } - uint64 uid() const { return uid_; } - void set_codec_delay(uint64 codec_delay) { codec_delay_ = codec_delay; } - uint64 codec_delay() const { return codec_delay_; } - void set_seek_pre_roll(uint64 seek_pre_roll) { + void set_number(uint64_t number) { number_ = number; } + uint64_t number() const { return number_; } + void set_type(uint64_t type) { type_ = type; } + uint64_t type() const { return type_; } + void set_uid(uint64_t uid) { uid_ = uid; } + uint64_t uid() const { return uid_; } + void set_codec_delay(uint64_t codec_delay) { codec_delay_ = codec_delay; } + uint64_t codec_delay() const { return codec_delay_; } + void set_seek_pre_roll(uint64_t seek_pre_roll) { seek_pre_roll_ = seek_pre_roll; } - uint64 seek_pre_roll() const { return seek_pre_roll_; } - void set_default_duration(uint64 default_duration) { + uint64_t seek_pre_roll() const { return seek_pre_roll_; } + void set_default_duration(uint64_t default_duration) { default_duration_ = default_duration; } - uint64 default_duration() const { return default_duration_; } + uint64_t default_duration() const { return default_duration_; } - uint64 codec_private_length() const { return codec_private_length_; } - uint32 content_encoding_entries_size() const { + uint64_t codec_private_length() const { return codec_private_length_; } + uint32_t content_encoding_entries_size() const { return content_encoding_entries_size_; } private: // Track element names. char* codec_id_; - uint8* codec_private_; + uint8_t* codec_private_; char* language_; - uint64 max_block_additional_id_; + uint64_t max_block_additional_id_; char* name_; - uint64 number_; - uint64 type_; - uint64 uid_; - uint64 codec_delay_; - uint64 seek_pre_roll_; - uint64 default_duration_; + uint64_t number_; + uint64_t type_; + uint64_t uid_; + uint64_t codec_delay_; + uint64_t seek_pre_roll_; + uint64_t default_duration_; // Size of the CodecPrivate data in bytes. - uint64 codec_private_length_; + uint64_t codec_private_length_; // ContentEncoding element list. ContentEncoding** content_encoding_entries_; // Number of ContentEncoding elements added. - uint32 content_encoding_entries_size_; + uint32_t content_encoding_entries_size_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track); }; @@ -437,56 +566,63 @@ class VideoTrack : public Track { // Returns the size in bytes for the payload of the Track element plus the // video specific elements. - virtual uint64 PayloadSize() const; + virtual uint64_t PayloadSize() const; // Output the VideoTrack element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; // Sets the video's stereo mode. Returns true on success. - bool SetStereoMode(uint64 stereo_mode); + bool SetStereoMode(uint64_t stereo_mode); // Sets the video's alpha mode. Returns true on success. - bool SetAlphaMode(uint64 alpha_mode); - - void set_display_height(uint64 height) { display_height_ = height; } - uint64 display_height() const { return display_height_; } - void set_display_width(uint64 width) { display_width_ = width; } - uint64 display_width() const { return display_width_; } - - void set_crop_left(uint64 crop_left) { crop_left_ = crop_left; } - uint64 crop_left() const { return crop_left_; } - void set_crop_right(uint64 crop_right) { crop_right_ = crop_right; } - uint64 crop_right() const { return crop_right_; } - void set_crop_top(uint64 crop_top) { crop_top_ = crop_top; } - uint64 crop_top() const { return crop_top_; } - void set_crop_bottom(uint64 crop_bottom) { crop_bottom_ = crop_bottom; } - uint64 crop_bottom() const { return crop_bottom_; } + bool SetAlphaMode(uint64_t alpha_mode); + + void set_display_height(uint64_t height) { display_height_ = height; } + uint64_t display_height() const { return display_height_; } + void set_display_width(uint64_t width) { display_width_ = width; } + uint64_t display_width() const { return display_width_; } + + void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; } + uint64_t crop_left() const { return crop_left_; } + void set_crop_right(uint64_t crop_right) { crop_right_ = crop_right; } + uint64_t crop_right() const { return crop_right_; } + void set_crop_top(uint64_t crop_top) { crop_top_ = crop_top; } + uint64_t crop_top() const { return crop_top_; } + void set_crop_bottom(uint64_t crop_bottom) { crop_bottom_ = crop_bottom; } + uint64_t crop_bottom() const { return crop_bottom_; } void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; } double frame_rate() const { return frame_rate_; } - void set_height(uint64 height) { height_ = height; } - uint64 height() const { return height_; } - uint64 stereo_mode() { return stereo_mode_; } - uint64 alpha_mode() { return alpha_mode_; } - void set_width(uint64 width) { width_ = width; } - uint64 width() const { return width_; } + void set_height(uint64_t height) { height_ = height; } + uint64_t height() const { return height_; } + uint64_t stereo_mode() { return stereo_mode_; } + uint64_t alpha_mode() { return alpha_mode_; } + void set_width(uint64_t width) { width_ = width; } + uint64_t width() const { return width_; } + + Colour* colour() { return colour_; } + + // Deep copies |colour|. + bool SetColour(const Colour& colour); private: // Returns the size in bytes of the Video element. - uint64 VideoPayloadSize() const; + uint64_t VideoPayloadSize() const; // Video track element names. - uint64 display_height_; - uint64 display_width_; - uint64 crop_left_; - uint64 crop_right_; - uint64 crop_top_; - uint64 crop_bottom_; + uint64_t display_height_; + uint64_t display_width_; + uint64_t crop_left_; + uint64_t crop_right_; + uint64_t crop_top_; + uint64_t crop_bottom_; double frame_rate_; - uint64 height_; - uint64 stereo_mode_; - uint64 alpha_mode_; - uint64 width_; + uint64_t height_; + uint64_t stereo_mode_; + uint64_t alpha_mode_; + uint64_t width_; + + Colour* colour_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack); }; @@ -501,22 +637,22 @@ class AudioTrack : public Track { // Returns the size in bytes for the payload of the Track element plus the // audio specific elements. - virtual uint64 PayloadSize() const; + virtual uint64_t PayloadSize() const; // Output the AudioTrack element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; - void set_bit_depth(uint64 bit_depth) { bit_depth_ = bit_depth; } - uint64 bit_depth() const { return bit_depth_; } - void set_channels(uint64 channels) { channels_ = channels; } - uint64 channels() const { return channels_; } + void set_bit_depth(uint64_t bit_depth) { bit_depth_ = bit_depth; } + uint64_t bit_depth() const { return bit_depth_; } + void set_channels(uint64_t channels) { channels_ = channels; } + uint64_t channels() const { return channels_; } void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; } double sample_rate() const { return sample_rate_; } private: // Audio track element names. - uint64 bit_depth_; - uint64 channels_; + uint64_t bit_depth_; + uint64_t channels_; double sample_rate_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack); @@ -542,32 +678,35 @@ class Tracks { // deleted by the Tracks object. Returns true on success. |number| is the // number to use for the track. |number| must be >= 0. If |number| == 0 // then the muxer will decide on the track number. - bool AddTrack(Track* track, int32 number); + bool AddTrack(Track* track, int32_t number); // Returns the track by index. Returns NULL if there is no track match. - const Track* GetTrackByIndex(uint32 idx) const; + const Track* GetTrackByIndex(uint32_t idx) const; // Search the Tracks and return the track that matches |tn|. Returns NULL // if there is no track match. - Track* GetTrackByNumber(uint64 track_number) const; + Track* GetTrackByNumber(uint64_t track_number) const; // Returns true if the track number is an audio track. - bool TrackIsAudio(uint64 track_number) const; + bool TrackIsAudio(uint64_t track_number) const; // Returns true if the track number is a video track. - bool TrackIsVideo(uint64 track_number) const; + bool TrackIsVideo(uint64_t track_number) const; // Output the Tracks element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; - uint32 track_entries_size() const { return track_entries_size_; } + uint32_t track_entries_size() const { return track_entries_size_; } private: // Track element list. Track** track_entries_; // Number of Track elements added. - uint32 track_entries_size_; + uint32_t track_entries_size_; + + // Whether or not Tracks element has already been written via IMkvWriter. + mutable bool wrote_tracks_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks); }; @@ -585,12 +724,12 @@ class Chapter { // Converts the nanosecond start and stop times of this chapter to // their corresponding timecode values, and stores them that way. - void set_time(const Segment& segment, uint64 start_time_ns, - uint64 end_time_ns); + void set_time(const Segment& segment, uint64_t start_time_ns, + uint64_t end_time_ns); // Sets the uid for this chapter. Primarily used to enable // deterministic output from the muxer. - void set_uid(const uint64 uid) { uid_ = uid; } + void set_uid(const uint64_t uid) { uid_ = uid; } // Add a title string to this chapter, per the semantics described // here: @@ -637,7 +776,7 @@ class Chapter { // If |writer| is non-NULL, serialize the Display sub-element of // the Atom into the stream. Returns the Display element size on // success, 0 if error. - uint64 WriteDisplay(IMkvWriter* writer) const; + uint64_t WriteDisplay(IMkvWriter* writer) const; private: char* title_; @@ -670,20 +809,20 @@ class Chapter { // If |writer| is non-NULL, serialize the Atom sub-element into the // stream. Returns the total size of the element on success, 0 if // error. - uint64 WriteAtom(IMkvWriter* writer) const; + uint64_t WriteAtom(IMkvWriter* writer) const; // The string identifier for this chapter (corresponds to WebVTT cue // identifier). char* id_; // Start timecode of the chapter. - uint64 start_timecode_; + uint64_t start_timecode_; // Stop timecode of the chapter. - uint64 end_timecode_; + uint64_t end_timecode_; // The binary identifier for this chapter. - uint64 uid_; + uint64_t uid_; // The Atom element can contain multiple Display sub-elements, as // the same logical title can be rendered in different languages. @@ -723,7 +862,7 @@ class Chapters { // If |writer| is non-NULL, serialize the Edition sub-element of the // Chapters element into the stream. Returns the Edition element // size on success, 0 if error. - uint64 WriteEdition(IMkvWriter* writer) const; + uint64_t WriteEdition(IMkvWriter* writer) const; // Total length of the chapters_ array. int chapters_size_; @@ -768,7 +907,7 @@ class Tag { // If |writer| is non-NULL, serialize the SimpleTag sub-element of // the Atom into the stream. Returns the SimpleTag element size on // success, 0 if error. - uint64 Write(IMkvWriter* writer) const; + uint64_t Write(IMkvWriter* writer) const; private: char* tag_name_; @@ -795,7 +934,7 @@ class Tag { // If |writer| is non-NULL, serialize the Tag sub-element into the // stream. Returns the total size of the element on success, 0 if // error. - uint64 Write(IMkvWriter* writer) const; + uint64_t Write(IMkvWriter* writer) const; // The Atom element can contain multiple SimpleTag sub-elements SimpleTag* simple_tags_; @@ -853,7 +992,9 @@ class Cluster { // |timecode| is the absolute timecode of the cluster. |cues_pos| is the // position for the cluster within the segment that should be written in // the cues element. |timecode_scale| is the timecode scale of the segment. - Cluster(uint64 timecode, int64 cues_pos, uint64 timecode_scale); + Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, + bool write_last_frame_with_duration = false, + bool fixed_size_timecode = false); ~Cluster(); bool Init(IMkvWriter* ptr_writer); @@ -872,8 +1013,8 @@ class Cluster { // timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrame(const uint8* data, uint64 length, uint64 track_number, - uint64 timecode, // timecode units (absolute) + bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, + uint64_t timecode, // timecode units (absolute) bool is_key); // Adds a frame to be output in the file. The frame is written out through @@ -889,10 +1030,11 @@ class Cluster { // abs_timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithAdditional(const uint8* data, uint64 length, - const uint8* additional, uint64 additional_length, - uint64 add_id, uint64 track_number, - uint64 abs_timecode, bool is_key); + bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, + const uint8_t* additional, + uint64_t additional_length, uint64_t add_id, + uint64_t track_number, uint64_t abs_timecode, + bool is_key); // Adds a frame to be output in the file. The frame is written out through // |writer_| if successful. Returns true on success. @@ -905,9 +1047,10 @@ class Cluster { // abs_timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithDiscardPadding(const uint8* data, uint64 length, - int64 discard_padding, uint64 track_number, - uint64 abs_timecode, bool is_key); + bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, + int64_t discard_padding, + uint64_t track_number, uint64_t abs_timecode, + bool is_key); // Writes a frame of metadata to the output medium; returns true on // success. @@ -923,31 +1066,53 @@ class Cluster { // The metadata frame is written as a block group, with a duration // sub-element but no reference time sub-elements (indicating that // it is considered a keyframe, per Matroska semantics). - bool AddMetadata(const uint8* data, uint64 length, uint64 track_number, - uint64 timecode, uint64 duration); + bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, + uint64_t timecode, uint64_t duration); // Increments the size of the cluster's data in bytes. - void AddPayloadSize(uint64 size); + void AddPayloadSize(uint64_t size); // Closes the cluster so no more data can be written to it. Will update the - // cluster's size if |writer_| is seekable. Returns true on success. + // cluster's size if |writer_| is seekable. Returns true on success. This + // variant of Finalize() fails when |write_last_frame_with_duration_| is set + // to true. bool Finalize(); + // Closes the cluster so no more data can be written to it. Will update the + // cluster's size if |writer_| is seekable. Returns true on success. + // Inputs: + // set_last_frame_duration: Boolean indicating whether or not the duration + // of the last frame should be set. If set to + // false, the |duration| value is ignored and + // |write_last_frame_with_duration_| will not be + // honored. + // duration: Duration of the Cluster in timecode scale. + bool Finalize(bool set_last_frame_duration, uint64_t duration); + // Returns the size in bytes for the entire Cluster element. - uint64 Size() const; + uint64_t Size() const; // Given |abs_timecode|, calculates timecode relative to most recent timecode. // Returns -1 on failure, or a relative timecode. - int64 GetRelativeTimecode(int64 abs_timecode) const; - - int64 size_position() const { return size_position_; } - int32 blocks_added() const { return blocks_added_; } - uint64 payload_size() const { return payload_size_; } - int64 position_for_cues() const { return position_for_cues_; } - uint64 timecode() const { return timecode_; } - uint64 timecode_scale() const { return timecode_scale_; } + int64_t GetRelativeTimecode(int64_t abs_timecode) const; + + int64_t size_position() const { return size_position_; } + int32_t blocks_added() const { return blocks_added_; } + uint64_t payload_size() const { return payload_size_; } + int64_t position_for_cues() const { return position_for_cues_; } + uint64_t timecode() const { return timecode_; } + uint64_t timecode_scale() const { return timecode_scale_; } + void set_write_last_frame_with_duration(bool write_last_frame_with_duration) { + write_last_frame_with_duration_ = write_last_frame_with_duration; + } + bool write_last_frame_with_duration() const { + return write_last_frame_with_duration_; + } private: + // Iterator type for the |stored_frames_| map. + typedef std::map<uint64_t, std::list<Frame*> >::iterator FrameMapIterator; + // Utility method that confirms that blocks can still be added, and that the // cluster header has been written. Used by |DoWriteFrame*|. Returns true // when successful. @@ -955,37 +1120,58 @@ class Cluster { // Utility method used by the |DoWriteFrame*| methods that handles the book // keeping required after each block is written. - void PostWriteBlock(uint64 element_size); + void PostWriteBlock(uint64_t element_size); // Does some verification and calls WriteFrame. bool DoWriteFrame(const Frame* const frame); + // Either holds back the given frame, or writes it out depending on whether or + // not |write_last_frame_with_duration_| is set. + bool QueueOrWriteFrame(const Frame* const frame); + // Outputs the Cluster header to |writer_|. Returns true on success. bool WriteClusterHeader(); // Number of blocks added to the cluster. - int32 blocks_added_; + int32_t blocks_added_; // Flag telling if the cluster has been closed. bool finalized_; + // Flag indicating whether the cluster's timecode will always be written out + // using 8 bytes. + bool fixed_size_timecode_; + // Flag telling if the cluster's header has been written. bool header_written_; // The size of the cluster elements in bytes. - uint64 payload_size_; + uint64_t payload_size_; // The file position used for cue points. - const int64 position_for_cues_; + const int64_t position_for_cues_; // The file position of the cluster's size element. - int64 size_position_; + int64_t size_position_; // The absolute timecode of the cluster. - const uint64 timecode_; + const uint64_t timecode_; // The timecode scale of the Segment containing the cluster. - const uint64 timecode_scale_; + const uint64_t timecode_scale_; + + // Flag indicating whether the last frame of the cluster should be written as + // a Block with Duration. If set to true, then it will result in holding back + // of frames and the parameterized version of Finalize() must be called to + // finish writing the Cluster. + bool write_last_frame_with_duration_; + + // Map used to hold back frames, if required. Track number is the key. + std::map<uint64_t, std::list<Frame*> > stored_frames_; + + // Map from track number to the timestamp of the last block written for that + // track. + std::map<uint64_t, uint64_t> last_block_timestamp_; // Pointer to the writer object. Not owned by this class. IMkvWriter* writer_; @@ -1006,42 +1192,42 @@ class SeekHead { // Adds a seek entry to be written out when the element is finalized. |id| // must be the coded mkv element id. |pos| is the file position of the // element. Returns true on success. - bool AddSeekEntry(uint32 id, uint64 pos); + bool AddSeekEntry(uint32_t id, uint64_t pos); // Writes out SeekHead and SeekEntry elements. Returns true on success. bool Finalize(IMkvWriter* writer) const; // Returns the id of the Seek Entry at the given index. Returns -1 if index is // out of range. - uint32 GetId(int index) const; + uint32_t GetId(int index) const; // Returns the position of the Seek Entry at the given index. Returns -1 if // index is out of range. - uint64 GetPosition(int index) const; + uint64_t GetPosition(int index) const; // Sets the Seek Entry id and position at given index. // Returns true on success. - bool SetSeekEntry(int index, uint32 id, uint64 position); + bool SetSeekEntry(int index, uint32_t id, uint64_t position); // Reserves space by writing out a Void element which will be updated with // a SeekHead element later. Returns true on success. bool Write(IMkvWriter* writer); // We are going to put a cap on the number of Seek Entries. - const static int32 kSeekEntryCount = 5; + const static int32_t kSeekEntryCount = 5; private: // Returns the maximum size in bytes of one seek entry. - uint64 MaxEntrySize() const; + uint64_t MaxEntrySize() const; // Seek entry id element list. - uint32 seek_entry_id_[kSeekEntryCount]; + uint32_t seek_entry_id_[kSeekEntryCount]; // Seek entry pos element list. - uint64 seek_entry_pos_[kSeekEntryCount]; + uint64_t seek_entry_pos_[kSeekEntryCount]; // The file position of SeekHead element. - int64 start_pos_; + int64_t start_pos_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead); }; @@ -1067,12 +1253,12 @@ class SegmentInfo { double duration() const { return duration_; } void set_muxing_app(const char* app); const char* muxing_app() const { return muxing_app_; } - void set_timecode_scale(uint64 scale) { timecode_scale_ = scale; } - uint64 timecode_scale() const { return timecode_scale_; } + void set_timecode_scale(uint64_t scale) { timecode_scale_ = scale; } + uint64_t timecode_scale() const { return timecode_scale_; } void set_writing_app(const char* app); const char* writing_app() const { return writing_app_; } - void set_date_utc(int64 date_utc) { date_utc_ = date_utc; } - int64 date_utc() const { return date_utc_; } + void set_date_utc(int64_t date_utc) { date_utc_ = date_utc; } + int64_t date_utc() const { return date_utc_; } private: // Segment Information element names. @@ -1081,14 +1267,14 @@ class SegmentInfo { double duration_; // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision. char* muxing_app_; - uint64 timecode_scale_; + uint64_t timecode_scale_; // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision. char* writing_app_; // LLONG_MIN when DateUTC is not set. - int64 date_utc_; + int64_t date_utc_; // The file position of the duration element. - int64 duration_pos_; + int64_t duration_pos_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo); }; @@ -1108,8 +1294,8 @@ class Segment { kBeforeClusters = 0x1 // Position Cues before Clusters }; - const static uint32 kDefaultDocTypeVersion = 2; - const static uint64 kDefaultMaxClusterDuration = 30000000000ULL; + const static uint32_t kDefaultDocTypeVersion = 2; + const static uint64_t kDefaultMaxClusterDuration = 30000000000ULL; Segment(); ~Segment(); @@ -1123,13 +1309,13 @@ class Segment { // error. |number| is the number to use for the track. |number| // must be >= 0. If |number| == 0 then the muxer will decide on the // track number. - Track* AddTrack(int32 number); + Track* AddTrack(int32_t number); // Adds a Vorbis audio track to the segment. Returns the number of the track // on success, 0 on error. |number| is the number to use for the audio track. // |number| must be >= 0. If |number| == 0 then the muxer will decide on // the track number. - uint64 AddAudioTrack(int32 sample_rate, int32 channels, int32 number); + uint64_t AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number); // Adds an empty chapter to the chapters of this segment. Returns // non-NULL on success. After adding the chapter, the caller should @@ -1145,7 +1331,7 @@ class Segment { // nanoseconds of the cue's time. |track| is the Track of the Cue. This // function must be called after AddFrame to calculate the correct // BlockNumber for the CuePoint. Returns true on success. - bool AddCuePoint(uint64 timestamp, uint64 track); + bool AddCuePoint(uint64_t timestamp, uint64_t track); // Adds a frame to be output in the file. Returns true on success. // Inputs: @@ -1155,8 +1341,8 @@ class Segment { // functions. // timestamp: Timestamp of the frame in nanoseconds from 0. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrame(const uint8* data, uint64 length, uint64 track_number, - uint64 timestamp_ns, bool is_key); + bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, + uint64_t timestamp_ns, bool is_key); // Writes a frame of metadata to the output medium; returns true on // success. @@ -1172,8 +1358,8 @@ class Segment { // The metadata frame is written as a block group, with a duration // sub-element but no reference time sub-elements (indicating that // it is considered a keyframe, per Matroska semantics). - bool AddMetadata(const uint8* data, uint64 length, uint64 track_number, - uint64 timestamp_ns, uint64 duration_ns); + bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, + uint64_t timestamp_ns, uint64_t duration_ns); // Writes a frame with additional data to the output medium; returns true on // success. @@ -1188,10 +1374,11 @@ class Segment { // timestamp: Absolute timestamp of the frame, expressed in nanosecond // units. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithAdditional(const uint8* data, uint64 length, - const uint8* additional, uint64 additional_length, - uint64 add_id, uint64 track_number, - uint64 timestamp, bool is_key); + bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, + const uint8_t* additional, + uint64_t additional_length, uint64_t add_id, + uint64_t track_number, uint64_t timestamp, + bool is_key); // Writes a frame with DiscardPadding to the output medium; returns true on // success. @@ -1204,9 +1391,10 @@ class Segment { // timestamp: Absolute timestamp of the frame, expressed in nanosecond // units. // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithDiscardPadding(const uint8* data, uint64 length, - int64 discard_padding, uint64 track_number, - uint64 timestamp, bool is_key); + bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, + int64_t discard_padding, + uint64_t track_number, uint64_t timestamp, + bool is_key); // Writes a Frame to the output medium. Chooses the correct way of writing // the frame (Block vs SimpleBlock) based on the parameters passed. @@ -1218,7 +1406,7 @@ class Segment { // success, 0 on error. |number| is the number to use for the video track. // |number| must be >= 0. If |number| == 0 then the muxer will decide on // the track number. - uint64 AddVideoTrack(int32 width, int32 height, int32 number); + uint64_t AddVideoTrack(int32_t width, int32_t height, int32_t number); // This function must be called after Finalize() if you need a copy of the // output with Cues written before the Clusters. It will return false if the @@ -1237,7 +1425,7 @@ class Segment { // Sets which track to use for the Cues element. Must have added the track // before calling this function. Returns true on success. |track_number| is // returned by the Add track functions. - bool CuesTrack(uint64 track_number); + bool CuesTrack(uint64_t track_number); // This will force the muxer to create a new Cluster when the next frame is // added. @@ -1257,11 +1445,17 @@ class Segment { // Search the Tracks and return the track that matches |track_number|. // Returns NULL if there is no track match. - Track* GetTrackByNumber(uint64 track_number) const; + Track* GetTrackByNumber(uint64_t track_number) const; // Toggles whether to output a cues element. void OutputCues(bool output_cues); + // Toggles whether to write the last frame in each Cluster with Duration. + void AccurateClusterDuration(bool accurate_cluster_duration); + + // Toggles whether to write the Cluster Timecode using exactly 8 bytes. + void UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode); + // Sets if the muxer will output files in chunks or not. |chunking| is a // flag telling whether or not to turn on chunking. |filename| is the base // filename for the chunk files. The header chunk file will be named @@ -1274,15 +1468,15 @@ class Segment { bool SetChunking(bool chunking, const char* filename); bool chunking() const { return chunking_; } - uint64 cues_track() const { return cues_track_; } - void set_max_cluster_duration(uint64 max_cluster_duration) { + uint64_t cues_track() const { return cues_track_; } + void set_max_cluster_duration(uint64_t max_cluster_duration) { max_cluster_duration_ = max_cluster_duration; } - uint64 max_cluster_duration() const { return max_cluster_duration_; } - void set_max_cluster_size(uint64 max_cluster_size) { + uint64_t max_cluster_duration() const { return max_cluster_duration_; } + void set_max_cluster_size(uint64_t max_cluster_size) { max_cluster_size_ = max_cluster_size; } - uint64 max_cluster_size() const { return max_cluster_size_; } + uint64_t max_cluster_size() const { return max_cluster_size_; } void set_mode(Mode mode) { mode_ = mode; } Mode mode() const { return mode_; } CuesPosition cues_position() const { return cues_position_; } @@ -1306,7 +1500,7 @@ class Segment { // Returns the maximum offset within the segment's payload. When chunking // this function is needed to determine offsets of elements within the // chunked files. Returns -1 on error. - int64 MaxOffset(); + int64_t MaxOffset(); // Adds the frame to our frame array. bool QueueFrame(Frame* frame); @@ -1318,7 +1512,7 @@ class Segment { // Output all frames that are queued that have an end time that is less // then |timestamp|. Returns true on success and if there are no frames // queued. - bool WriteFramesLessThan(uint64 timestamp); + bool WriteFramesLessThan(uint64_t timestamp); // Outputs the segment header, Segment Information element, SeekHead element, // and Tracks element to |writer_|. @@ -1332,16 +1526,17 @@ class Segment { // 0 = do not create a new cluster, and write frame to the existing cluster // 1 = create a new cluster, and write frame to that new cluster // 2 = create a new cluster, and re-run test - int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const; + int TestFrame(uint64_t track_num, uint64_t timestamp_ns, bool key) const; // Create a new cluster, using the earlier of the first enqueued // frame, or the indicated time. Returns true on success. - bool MakeNewCluster(uint64 timestamp_ns); + bool MakeNewCluster(uint64_t timestamp_ns); // Checks whether a new cluster needs to be created, and if so // creates a new cluster. Returns false if creation of a new cluster // was necessary but creation was not successful. - bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key); + bool DoNewClusterProcessing(uint64_t track_num, uint64_t timestamp_ns, + bool key); // Adjusts Cue Point values (to place Cues before Clusters) so that they // reflect the correct offsets. @@ -1355,7 +1550,8 @@ class Segment { // accounted for. // index - index in the list of Cues which is currently being adjusted. // cue_size - sum of size of all the CuePoint elements. - void MoveCuesBeforeClustersHelper(uint64 diff, int index, uint64* cue_size); + void MoveCuesBeforeClustersHelper(uint64_t diff, int index, + uint64_t* cue_size); // Seeds the random number generator used to make UIDs. unsigned int seed_; @@ -1394,22 +1590,22 @@ class Segment { char* chunking_base_name_; // File position offset where the Clusters end. - int64 cluster_end_offset_; + int64_t cluster_end_offset_; // List of clusters. Cluster** cluster_list_; // Number of cluster pointers allocated in the cluster list. - int32 cluster_list_capacity_; + int32_t cluster_list_capacity_; // Number of clusters in the cluster list. - int32 cluster_list_size_; + int32_t cluster_list_size_; // Indicates whether Cues should be written before or after Clusters CuesPosition cues_position_; // Track number that is associated with the cues element for this segment. - uint64 cues_track_; + uint64_t cues_track_; // Tells the muxer to force a new cluster on the next Block. bool force_new_cluster_; @@ -1421,10 +1617,10 @@ class Segment { Frame** frames_; // Number of frame pointers allocated in the frame list. - int32 frames_capacity_; + int32_t frames_capacity_; // Number of frames in the frame list. - int32 frames_size_; + int32_t frames_size_; // Flag telling if a video track has been added to the segment. bool has_video_; @@ -1433,23 +1629,23 @@ class Segment { bool header_written_; // Duration of the last block in nanoseconds. - uint64 last_block_duration_; + uint64_t last_block_duration_; // Last timestamp in nanoseconds added to a cluster. - uint64 last_timestamp_; + uint64_t last_timestamp_; // Last timestamp in nanoseconds by track number added to a cluster. - uint64 last_track_timestamp_[kMaxTrackNumber]; + uint64_t last_track_timestamp_[kMaxTrackNumber]; // Maximum time in nanoseconds for a cluster duration. This variable is a // guideline and some clusters may have a longer duration. Default is 30 // seconds. - uint64 max_cluster_duration_; + uint64_t max_cluster_duration_; // Maximum size in bytes for a cluster. This variable is a guideline and // some clusters may have a larger size. Default is 0 which signifies that // the muxer will decide the size. - uint64 max_cluster_size_; + uint64_t max_cluster_size_; // The mode that segment is in. If set to |kLive| the writer must not // seek backwards. @@ -1462,22 +1658,29 @@ class Segment { // Flag whether or not the muxer should output a Cues element. bool output_cues_; + // Flag whether or not the last frame in each Cluster will have a Duration + // element in it. + bool accurate_cluster_duration_; + + // Flag whether or not to write the Cluster Timecode using exactly 8 bytes. + bool fixed_size_cluster_timecode_; + // The size of the EBML header, used to validate the header if // WriteEbmlHeader() is called more than once. - int32 ebml_header_size_; + int32_t ebml_header_size_; // The file position of the segment's payload. - int64 payload_pos_; + int64_t payload_pos_; // The file position of the element's size. - int64 size_position_; + int64_t size_position_; // Current DocTypeVersion (|doc_type_version_|) and that written in // WriteSegmentHeader(). // WriteEbmlHeader() will be called from Finalize() if |doc_type_version_| // differs from |doc_type_version_written_|. - uint32 doc_type_version_; - uint32 doc_type_version_written_; + uint32_t doc_type_version_; + uint32_t doc_type_version_written_; // Pointer to the writer objects. Not owned by this class. IMkvWriter* writer_cluster_; @@ -1487,6 +1690,6 @@ class Segment { LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment); }; -} // end namespace mkvmuxer +} // namespace mkvmuxer -#endif // MKVMUXER_HPP +#endif // MKVMUXER_MKVMUXER_H_ diff --git a/libvpx/third_party/libwebm/mkvmuxertypes.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h index d0fc9fec8..e5db12160 100644 --- a/libvpx/third_party/libwebm/mkvmuxertypes.hpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h @@ -6,25 +6,23 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#ifndef MKVMUXERTYPES_HPP -#define MKVMUXERTYPES_HPP - -// Copied from Chromium basictypes.h -// A macro to disallow the copy constructor and operator= functions -// This should be used in the private: declarations for a class -#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) +#ifndef MKVMUXER_MKVMUXERTYPES_H_ +#define MKVMUXER_MKVMUXERTYPES_H_ namespace mkvmuxer { - typedef unsigned char uint8; typedef short int16; typedef int int32; typedef unsigned int uint32; typedef long long int64; typedef unsigned long long uint64; +} // namespace mkvmuxer -} // end namespace mkvmuxer +// Copied from Chromium basictypes.h +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) -#endif // MKVMUXERTYPES_HPP +#endif // MKVMUXER_MKVMUXERTYPES_HPP_ diff --git a/libvpx/third_party/libwebm/mkvmuxerutil.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc index 27ab15d51..3562b8ab8 100644 --- a/libvpx/third_party/libwebm/mkvmuxerutil.cpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc @@ -6,7 +6,7 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#include "mkvmuxerutil.hpp" +#include "mkvmuxer/mkvmuxerutil.h" #ifdef __ANDROID__ #include <fcntl.h> @@ -20,13 +20,9 @@ #include <ctime> #include <new> -#include "mkvwriter.hpp" -#include "webmids.hpp" - -#ifdef _MSC_VER -// Disable MSVC warnings that suggest making code non-portable. -#pragma warning(disable : 4996) -#endif +#include "common/webmids.h" +#include "mkvmuxer/mkvmuxer.h" +#include "mkvmuxer/mkvwriter.h" namespace mkvmuxer { @@ -35,64 +31,68 @@ namespace { // Date elements are always 8 octets in size. const int kDateElementSize = 8; -uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode, - uint64 timecode_scale) { - uint64 block_additional_elem_size = 0; - uint64 block_addid_elem_size = 0; - uint64 block_more_payload_size = 0; - uint64 block_more_elem_size = 0; - uint64 block_additions_payload_size = 0; - uint64 block_additions_elem_size = 0; +uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame, + int64_t timecode, uint64_t timecode_scale) { + uint64_t block_additional_elem_size = 0; + uint64_t block_addid_elem_size = 0; + uint64_t block_more_payload_size = 0; + uint64_t block_more_elem_size = 0; + uint64_t block_additions_payload_size = 0; + uint64_t block_additions_elem_size = 0; if (frame->additional()) { - block_additional_elem_size = EbmlElementSize( - kMkvBlockAdditional, frame->additional(), frame->additional_length()); - block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, frame->add_id()); + block_additional_elem_size = + EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(), + frame->additional_length()); + block_addid_elem_size = + EbmlElementSize(libwebm::kMkvBlockAddID, frame->add_id()); block_more_payload_size = block_addid_elem_size + block_additional_elem_size; block_more_elem_size = - EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) + + EbmlMasterElementSize(libwebm::kMkvBlockMore, block_more_payload_size) + block_more_payload_size; block_additions_payload_size = block_more_elem_size; block_additions_elem_size = - EbmlMasterElementSize(kMkvBlockAdditions, + EbmlMasterElementSize(libwebm::kMkvBlockAdditions, block_additions_payload_size) + block_additions_payload_size; } - uint64 discard_padding_elem_size = 0; + uint64_t discard_padding_elem_size = 0; if (frame->discard_padding() != 0) { discard_padding_elem_size = - EbmlElementSize(kMkvDiscardPadding, frame->discard_padding()); + EbmlElementSize(libwebm::kMkvDiscardPadding, frame->discard_padding()); } - const uint64 reference_block_timestamp = + const uint64_t reference_block_timestamp = frame->reference_block_timestamp() / timecode_scale; - uint64 reference_block_elem_size = 0; + uint64_t reference_block_elem_size = 0; if (!frame->is_key()) { reference_block_elem_size = - EbmlElementSize(kMkvReferenceBlock, reference_block_timestamp); + EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp); } - const uint64 duration = frame->duration() / timecode_scale; - uint64 block_duration_elem_size = 0; + const uint64_t duration = frame->duration() / timecode_scale; + uint64_t block_duration_elem_size = 0; if (duration > 0) - block_duration_elem_size = EbmlElementSize(kMkvBlockDuration, duration); + block_duration_elem_size = + EbmlElementSize(libwebm::kMkvBlockDuration, duration); - const uint64 block_payload_size = 4 + frame->length(); - const uint64 block_elem_size = - EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size; + const uint64_t block_payload_size = 4 + frame->length(); + const uint64_t block_elem_size = + EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) + + block_payload_size; - const uint64 block_group_payload_size = + const uint64_t block_group_payload_size = block_elem_size + block_additions_elem_size + block_duration_elem_size + discard_padding_elem_size + reference_block_elem_size; - if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, + if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockGroup, block_group_payload_size)) { return 0; } - if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlock, block_payload_size)) return 0; if (WriteUInt(writer, frame->track_number())) @@ -105,77 +105,81 @@ uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode, if (SerializeInt(writer, 0, 1)) return 0; - if (writer->Write(frame->frame(), static_cast<uint32>(frame->length()))) + if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length()))) return 0; if (frame->additional()) { - if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions, + if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockAdditions, block_additions_payload_size)) { return 0; } - if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size)) + if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockMore, + block_more_payload_size)) return 0; - if (!WriteEbmlElement(writer, kMkvBlockAddID, frame->add_id())) + if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, frame->add_id())) return 0; - if (!WriteEbmlElement(writer, kMkvBlockAdditional, frame->additional(), - frame->additional_length())) { + if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional, + frame->additional(), frame->additional_length())) { return 0; } } if (frame->discard_padding() != 0 && - !WriteEbmlElement(writer, kMkvDiscardPadding, frame->discard_padding())) { + !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding, + frame->discard_padding())) { return false; } if (!frame->is_key() && - !WriteEbmlElement(writer, kMkvReferenceBlock, + !WriteEbmlElement(writer, libwebm::kMkvReferenceBlock, reference_block_timestamp)) { return false; } - if (duration > 0 && !WriteEbmlElement(writer, kMkvBlockDuration, duration)) { + if (duration > 0 && + !WriteEbmlElement(writer, libwebm::kMkvBlockDuration, duration)) { return false; } - return EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) + + return EbmlMasterElementSize(libwebm::kMkvBlockGroup, + block_group_payload_size) + block_group_payload_size; } -uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame, - int64 timecode) { - if (WriteID(writer, kMkvSimpleBlock)) +uint64_t WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame, + int64_t timecode) { + if (WriteID(writer, libwebm::kMkvSimpleBlock)) return 0; - const int32 size = static_cast<int32>(frame->length()) + 4; + const int32_t size = static_cast<int32_t>(frame->length()) + 4; if (WriteUInt(writer, size)) return 0; - if (WriteUInt(writer, static_cast<uint64>(frame->track_number()))) + if (WriteUInt(writer, static_cast<uint64_t>(frame->track_number()))) return 0; if (SerializeInt(writer, timecode, 2)) return 0; - uint64 flags = 0; + uint64_t flags = 0; if (frame->is_key()) flags |= 0x80; if (SerializeInt(writer, flags, 1)) return 0; - if (writer->Write(frame->frame(), static_cast<uint32>(frame->length()))) + if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length()))) return 0; - return GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + - frame->length(); + return static_cast<uint64_t>(GetUIntSize(libwebm::kMkvSimpleBlock) + + GetCodedUIntSize(size) + 4 + frame->length()); } } // namespace -int32 GetCodedUIntSize(uint64 value) { +int32_t GetCodedUIntSize(uint64_t value) { if (value < 0x000000000000007FULL) return 1; else if (value < 0x0000000000003FFFULL) @@ -193,7 +197,7 @@ int32 GetCodedUIntSize(uint64 value) { return 8; } -int32 GetUIntSize(uint64 value) { +int32_t GetUIntSize(uint64_t value) { if (value < 0x0000000000000100ULL) return 1; else if (value < 0x0000000000010000ULL) @@ -211,26 +215,26 @@ int32 GetUIntSize(uint64 value) { return 8; } -int32 GetIntSize(int64 value) { +int32_t GetIntSize(int64_t value) { // Doubling the requested value ensures positive values with their high bit // set are written with 0-padding to avoid flipping the signedness. - const uint64 v = (value < 0) ? value ^ -1LL : value; + const uint64_t v = (value < 0) ? value ^ -1LL : value; return GetUIntSize(2 * v); } -uint64 EbmlMasterElementSize(uint64 type, uint64 value) { +uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value) { // Size of EBML ID - int32 ebml_size = GetUIntSize(type); + int32_t ebml_size = GetUIntSize(type); // Datasize ebml_size += GetCodedUIntSize(value); - return ebml_size; + return static_cast<uint64_t>(ebml_size); } -uint64 EbmlElementSize(uint64 type, int64 value) { +uint64_t EbmlElementSize(uint64_t type, int64_t value) { // Size of EBML ID - int32 ebml_size = GetUIntSize(type); + int32_t ebml_size = GetUIntSize(type); // Datasize ebml_size += GetIntSize(value); @@ -238,15 +242,20 @@ uint64 EbmlElementSize(uint64 type, int64 value) { // Size of Datasize ebml_size++; - return ebml_size; + return static_cast<uint64_t>(ebml_size); +} + +uint64_t EbmlElementSize(uint64_t type, uint64_t value) { + return EbmlElementSize(type, value, 0); } -uint64 EbmlElementSize(uint64 type, uint64 value) { +uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) { // Size of EBML ID - int32 ebml_size = GetUIntSize(type); + uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type)); // Datasize - ebml_size += GetUIntSize(value); + ebml_size += + (fixed_size > 0) ? fixed_size : static_cast<uint64_t>(GetUIntSize(value)); // Size of Datasize ebml_size++; @@ -254,9 +263,9 @@ uint64 EbmlElementSize(uint64 type, uint64 value) { return ebml_size; } -uint64 EbmlElementSize(uint64 type, float /* value */) { +uint64_t EbmlElementSize(uint64_t type, float /* value */) { // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); + uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type)); // Datasize ebml_size += sizeof(float); @@ -267,12 +276,12 @@ uint64 EbmlElementSize(uint64 type, float /* value */) { return ebml_size; } -uint64 EbmlElementSize(uint64 type, const char* value) { +uint64_t EbmlElementSize(uint64_t type, const char* value) { if (!value) return 0; // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); + uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type)); // Datasize ebml_size += strlen(value); @@ -283,12 +292,12 @@ uint64 EbmlElementSize(uint64 type, const char* value) { return ebml_size; } -uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) { +uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) { if (!value) return 0; // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); + uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type)); // Datasize ebml_size += size; @@ -299,9 +308,9 @@ uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) { return ebml_size; } -uint64 EbmlDateElementSize(uint64 type) { +uint64_t EbmlDateElementSize(uint64_t type) { // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); + uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type)); // Datasize ebml_size += kDateElementSize; @@ -312,18 +321,18 @@ uint64 EbmlDateElementSize(uint64 type) { return ebml_size; } -int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) { +int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) { if (!writer || size < 1 || size > 8) return -1; - for (int32 i = 1; i <= size; ++i) { - const int32 byte_count = size - i; - const int32 bit_count = byte_count * 8; + for (int32_t i = 1; i <= size; ++i) { + const int32_t byte_count = size - i; + const int32_t bit_count = byte_count * 8; - const int64 bb = value >> bit_count; - const uint8 b = static_cast<uint8>(bb); + const int64_t bb = value >> bit_count; + const uint8_t b = static_cast<uint8_t>(bb); - const int32 status = writer->Write(&b, 1); + const int32_t status = writer->Write(&b, 1); if (status < 0) return status; @@ -332,26 +341,26 @@ int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) { return 0; } -int32 SerializeFloat(IMkvWriter* writer, float f) { +int32_t SerializeFloat(IMkvWriter* writer, float f) { if (!writer) return -1; - assert(sizeof(uint32) == sizeof(float)); + assert(sizeof(uint32_t) == sizeof(float)); // This union is merely used to avoid a reinterpret_cast from float& to // uint32& which will result in violation of strict aliasing. union U32 { - uint32 u32; + uint32_t u32; float f; } value; value.f = f; - for (int32 i = 1; i <= 4; ++i) { - const int32 byte_count = 4 - i; - const int32 bit_count = byte_count * 8; + for (int32_t i = 1; i <= 4; ++i) { + const int32_t byte_count = 4 - i; + const int32_t bit_count = byte_count * 8; - const uint8 byte = static_cast<uint8>(value.u32 >> bit_count); + const uint8_t byte = static_cast<uint8_t>(value.u32 >> bit_count); - const int32 status = writer->Write(&byte, 1); + const int32_t status = writer->Write(&byte, 1); if (status < 0) return status; @@ -360,21 +369,21 @@ int32 SerializeFloat(IMkvWriter* writer, float f) { return 0; } -int32 WriteUInt(IMkvWriter* writer, uint64 value) { +int32_t WriteUInt(IMkvWriter* writer, uint64_t value) { if (!writer) return -1; - int32 size = GetCodedUIntSize(value); + int32_t size = GetCodedUIntSize(value); return WriteUIntSize(writer, value, size); } -int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { +int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) { if (!writer || size < 0 || size > 8) return -1; if (size > 0) { - const uint64 bit = 1LL << (size * 7); + const uint64_t bit = 1LL << (size * 7); if (value > (bit - 2)) return -1; @@ -382,11 +391,11 @@ int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { value |= bit; } else { size = 1; - int64 bit; + int64_t bit; for (;;) { bit = 1LL << (size * 7); - const uint64 max = bit - 2; + const uint64_t max = bit - 2; if (value <= max) break; @@ -403,18 +412,18 @@ int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { return SerializeInt(writer, value, size); } -int32 WriteID(IMkvWriter* writer, uint64 type) { +int32_t WriteID(IMkvWriter* writer, uint64_t type) { if (!writer) return -1; writer->ElementStartNotify(type, writer->Position()); - const int32 size = GetUIntSize(type); + const int32_t size = GetUIntSize(type); return SerializeInt(writer, type, size); } -bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) { +bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) { if (!writer) return false; @@ -427,41 +436,51 @@ bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) { return true; } -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) { +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value) { + return WriteEbmlElement(writer, type, value, 0); +} + +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value, + uint64_t fixed_size) { if (!writer) return false; if (WriteID(writer, type)) return false; - const uint64 size = GetUIntSize(value); + uint64_t size = static_cast<uint64_t>(GetUIntSize(value)); + if (fixed_size > 0) { + if (size > fixed_size) + return false; + size = fixed_size; + } if (WriteUInt(writer, size)) return false; - if (SerializeInt(writer, value, static_cast<int32>(size))) + if (SerializeInt(writer, value, static_cast<int32_t>(size))) return false; return true; } -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) { +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value) { if (!writer) return false; if (WriteID(writer, type)) return 0; - const uint64 size = GetIntSize(value); + const uint64_t size = GetIntSize(value); if (WriteUInt(writer, size)) return false; - if (SerializeInt(writer, value, static_cast<int32>(size))) + if (SerializeInt(writer, value, static_cast<int32_t>(size))) return false; return true; } -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) { +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) { if (!writer) return false; @@ -477,25 +496,25 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) { return true; } -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value) { if (!writer || !value) return false; if (WriteID(writer, type)) return false; - const uint64 length = strlen(value); + const uint64_t length = strlen(value); if (WriteUInt(writer, length)) return false; - if (writer->Write(value, static_cast<const uint32>(length))) + if (writer->Write(value, static_cast<const uint32_t>(length))) return false; return true; } -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, - uint64 size) { +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value, + uint64_t size) { if (!writer || !value || size < 1) return false; @@ -505,13 +524,13 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, if (WriteUInt(writer, size)) return false; - if (writer->Write(value, static_cast<uint32>(size))) + if (writer->Write(value, static_cast<uint32_t>(size))) return false; return true; } -bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) { +bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) { if (!writer) return false; @@ -527,8 +546,8 @@ bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) { return true; } -uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, - Cluster* cluster) { +uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame, + Cluster* cluster) { if (!writer || !frame || !frame->IsValid() || !cluster || !cluster->timecode_scale()) return 0; @@ -537,7 +556,7 @@ uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, // timecode for the cluster itself (remember that block timecode // is a signed, 16-bit integer). However, as a simplification we // only permit non-negative cluster-relative timecodes for blocks. - const int64 relative_timecode = cluster->GetRelativeTimecode( + const int64_t relative_timecode = cluster->GetRelativeTimecode( frame->timestamp() / cluster->timecode_scale()); if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode) return 0; @@ -548,53 +567,53 @@ uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, cluster->timecode_scale()); } -uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) { +uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) { if (!writer) return false; // Subtract one for the void ID and the coded size. - uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1); - uint64 void_size = - EbmlMasterElementSize(kMkvVoid, void_entry_size) + void_entry_size; + uint64_t void_entry_size = size - 1 - GetCodedUIntSize(size - 1); + uint64_t void_size = + EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) + + void_entry_size; if (void_size != size) return 0; - const int64 payload_position = writer->Position(); + const int64_t payload_position = writer->Position(); if (payload_position < 0) return 0; - if (WriteID(writer, kMkvVoid)) + if (WriteID(writer, libwebm::kMkvVoid)) return 0; if (WriteUInt(writer, void_entry_size)) return 0; - const uint8 value = 0; - for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) { + const uint8_t value = 0; + for (int32_t i = 0; i < static_cast<int32_t>(void_entry_size); ++i) { if (writer->Write(&value, 1)) return 0; } - const int64 stop_position = writer->Position(); + const int64_t stop_position = writer->Position(); if (stop_position < 0 || - stop_position - payload_position != static_cast<int64>(void_size)) + stop_position - payload_position != static_cast<int64_t>(void_size)) return 0; return void_size; } -void GetVersion(int32* major, int32* minor, int32* build, int32* revision) { +void GetVersion(int32_t* major, int32_t* minor, int32_t* build, + int32_t* revision) { *major = 0; *minor = 2; *build = 1; *revision = 0; } -} // namespace mkvmuxer - -mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) { - uint64 uid = 0; +uint64_t MakeUID(unsigned int* seed) { + uint64_t uid = 0; #ifdef __MINGW32__ srand(*seed); @@ -606,24 +625,26 @@ mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) { // TODO(fgalligan): Move random number generation to platform specific code. #ifdef _MSC_VER (void)seed; - const int32 nn = rand(); + const int32_t nn = rand(); #elif __ANDROID__ - int32 temp_num = 1; + int32_t temp_num = 1; int fd = open("/dev/urandom", O_RDONLY); if (fd != -1) { - read(fd, &temp_num, sizeof(int32)); + read(fd, &temp_num, sizeof(temp_num)); close(fd); } - const int32 nn = temp_num; + const int32_t nn = temp_num; #elif defined __MINGW32__ - const int32 nn = rand(); + const int32_t nn = rand(); #else - const int32 nn = rand_r(seed); + const int32_t nn = rand_r(seed); #endif - const int32 n = 0xFF & (nn >> 4); // throw away low-order bits + const int32_t n = 0xFF & (nn >> 4); // throw away low-order bits uid |= n; } return uid; } + +} // namespace mkvmuxer diff --git a/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h new file mode 100644 index 000000000..0e21a2dcb --- /dev/null +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h @@ -0,0 +1,95 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#ifndef MKVMUXER_MKVMUXERUTIL_H_ +#define MKVMUXER_MKVMUXERUTIL_H_ + +#include <stdint.h> + +namespace mkvmuxer { +class Cluster; +class Frame; +class IMkvWriter; + +const uint64_t kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL; +const int64_t kMaxBlockTimecode = 0x07FFFLL; + +// Writes out |value| in Big Endian order. Returns 0 on success. +int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size); + +// Returns the size in bytes of the element. +int32_t GetUIntSize(uint64_t value); +int32_t GetIntSize(int64_t value); +int32_t GetCodedUIntSize(uint64_t value); +uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value); +uint64_t EbmlElementSize(uint64_t type, int64_t value); +uint64_t EbmlElementSize(uint64_t type, uint64_t value); +uint64_t EbmlElementSize(uint64_t type, float value); +uint64_t EbmlElementSize(uint64_t type, const char* value); +uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size); +uint64_t EbmlDateElementSize(uint64_t type); + +// Returns the size in bytes of the element assuming that the element was +// written using |fixed_size| bytes. If |fixed_size| is set to zero, then it +// computes the necessary number of bytes based on |value|. +uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size); + +// Creates an EBML coded number from |value| and writes it out. The size of +// the coded number is determined by the value of |value|. |value| must not +// be in a coded form. Returns 0 on success. +int32_t WriteUInt(IMkvWriter* writer, uint64_t value); + +// Creates an EBML coded number from |value| and writes it out. The size of +// the coded number is determined by the value of |size|. |value| must not +// be in a coded form. Returns 0 on success. +int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size); + +// Output an Mkv master element. Returns true if the element was written. +bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t value, uint64_t size); + +// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the +// ID to |SerializeInt|. Returns 0 on success. +int32_t WriteID(IMkvWriter* writer, uint64_t type); + +// Output an Mkv non-master element. Returns true if the element was written. +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value); +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value); +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value); +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value); +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value, + uint64_t size); +bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value); + +// Output an Mkv non-master element using fixed size. The element will be +// written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero +// then it computes the necessary number of bytes based on |value|. Returns true +// if the element was written. +bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value, + uint64_t fixed_size); + +// Output a Mkv Frame. It decides the correct element to write (Block vs +// SimpleBlock) based on the parameters of the Frame. +uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame, + Cluster* cluster); + +// Output a void element. |size| must be the entire size in bytes that will be +// void. The function will calculate the size of the void header and subtract +// it from |size|. +uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size); + +// Returns the version number of the muxer in |major|, |minor|, |build|, +// and |revision|. +void GetVersion(int32_t* major, int32_t* minor, int32_t* build, + int32_t* revision); + +// Returns a random number to be used for UID, using |seed| to seed +// the random-number generator (see POSIX rand_r() for semantics). +uint64_t MakeUID(unsigned int* seed); + +} // namespace mkvmuxer + +#endif // MKVMUXER_MKVMUXERUTIL_H_ diff --git a/libvpx/third_party/libwebm/mkvwriter.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc index 75d4350c7..ca48e149c 100644 --- a/libvpx/third_party/libwebm/mkvwriter.cpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc @@ -6,14 +6,12 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#include "mkvwriter.hpp" +#include "mkvmuxer/mkvwriter.h" #ifdef _MSC_VER #include <share.h> // for _SH_DENYWR #endif -#include <new> - namespace mkvmuxer { MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {} diff --git a/libvpx/third_party/libwebm/mkvwriter.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h index 684560c92..4227c6374 100644 --- a/libvpx/third_party/libwebm/mkvwriter.hpp +++ b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h @@ -6,13 +6,13 @@ // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. -#ifndef MKVWRITER_HPP -#define MKVWRITER_HPP +#ifndef MKVMUXER_MKVWRITER_H_ +#define MKVMUXER_MKVWRITER_H_ #include <stdio.h> -#include "mkvmuxer.hpp" -#include "mkvmuxertypes.hpp" +#include "mkvmuxer/mkvmuxer.h" +#include "mkvmuxer/mkvmuxertypes.h" namespace mkvmuxer { @@ -46,6 +46,6 @@ class MkvWriter : public IMkvWriter { LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter); }; -} // end namespace mkvmuxer +} // namespace mkvmuxer -#endif // MKVWRITER_HPP +#endif // MKVMUXER_MKVWRITER_H_ diff --git a/libvpx/third_party/libwebm/mkvmuxerutil.hpp b/libvpx/third_party/libwebm/mkvmuxerutil.hpp deleted file mode 100644 index e31857694..000000000 --- a/libvpx/third_party/libwebm/mkvmuxerutil.hpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXERUTIL_HPP -#define MKVMUXERUTIL_HPP - -#include "mkvmuxer.hpp" -#include "mkvmuxertypes.hpp" - -namespace mkvmuxer { - -class IMkvWriter; - -const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL; -const int64 kMaxBlockTimecode = 0x07FFFLL; - -// Writes out |value| in Big Endian order. Returns 0 on success. -int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size); - -// Returns the size in bytes of the element. -int32 GetUIntSize(uint64 value); -int32 GetIntSize(int64 value); -int32 GetCodedUIntSize(uint64 value); -uint64 EbmlMasterElementSize(uint64 type, uint64 value); -uint64 EbmlElementSize(uint64 type, int64 value); -uint64 EbmlElementSize(uint64 type, uint64 value); -uint64 EbmlElementSize(uint64 type, float value); -uint64 EbmlElementSize(uint64 type, const char* value); -uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size); -uint64 EbmlDateElementSize(uint64 type); - -// Creates an EBML coded number from |value| and writes it out. The size of -// the coded number is determined by the value of |value|. |value| must not -// be in a coded form. Returns 0 on success. -int32 WriteUInt(IMkvWriter* writer, uint64 value); - -// Creates an EBML coded number from |value| and writes it out. The size of -// the coded number is determined by the value of |size|. |value| must not -// be in a coded form. Returns 0 on success. -int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size); - -// Output an Mkv master element. Returns true if the element was written. -bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size); - -// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the -// ID to |SerializeInt|. Returns 0 on success. -int32 WriteID(IMkvWriter* writer, uint64 type); - -// Output an Mkv non-master element. Returns true if the element was written. -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, - uint64 size); -bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value); - -// Output a Mkv Frame. It decides the correct element to write (Block vs -// SimpleBlock) based on the parameters of the Frame. -uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, - Cluster* cluster); - -// Output a void element. |size| must be the entire size in bytes that will be -// void. The function will calculate the size of the void header and subtract -// it from |size|. -uint64 WriteVoidElement(IMkvWriter* writer, uint64 size); - -// Returns the version number of the muxer in |major|, |minor|, |build|, -// and |revision|. -void GetVersion(int32* major, int32* minor, int32* build, int32* revision); - -// Returns a random number to be used for UID, using |seed| to seed -// the random-number generator (see POSIX rand_r() for semantics). -uint64 MakeUID(unsigned int* seed); - -} // end namespace mkvmuxer - -#endif // MKVMUXERUTIL_HPP diff --git a/libvpx/third_party/libwebm/mkvparser.cpp b/libvpx/third_party/libwebm/mkvparser/mkvparser.cc index f2855d506..21801154d 100644 --- a/libvpx/third_party/libwebm/mkvparser.cpp +++ b/libvpx/third_party/libwebm/mkvparser/mkvparser.cc @@ -5,8 +5,7 @@ // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. - -#include "mkvparser.hpp" +#include "mkvparser/mkvparser.h" #if defined(_MSC_VER) && _MSC_VER < 1800 #include <float.h> // _isnan() / _finite() @@ -14,19 +13,18 @@ #endif #include <cassert> +#include <cfloat> #include <climits> #include <cmath> #include <cstring> +#include <memory> #include <new> -#include "webmids.hpp" - -#ifdef _MSC_VER -// Disable MSVC warnings that suggest making code non-portable. -#pragma warning(disable : 4996) -#endif +#include "common/webmids.h" namespace mkvparser { +const float MasteringMetadata::kValueNotPresent = FLT_MAX; +const long long Colour::kValueNotPresent = LLONG_MAX; #ifdef MSC_COMPAT inline bool isnan(double val) { return !!_isnan(val); } @@ -38,8 +36,9 @@ inline bool isinf(double val) { return std::isinf(val); } IMkvReader::~IMkvReader() {} -template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size) { +template <typename Type> +Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size) { if (num_elements == 0 || element_size == 0) return NULL; @@ -350,9 +349,8 @@ long UnserializeString(IMkvReader* pReader, long long pos, long long size, return 0; } -long ParseElementHeader(IMkvReader* pReader, long long& pos, - long long stop, long long& id, - long long& size) { +long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, + long long& id, long long& size) { if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; @@ -386,7 +384,7 @@ long ParseElementHeader(IMkvReader* pReader, long long& pos, // pos now designates payload - if (stop >= 0 && pos >= stop) + if (stop >= 0 && pos > stop) return E_FILE_FORMAT_INVALID; return 0; // success @@ -520,7 +518,6 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { return status; pos = 0; - long long end = (available >= 1024) ? 1024 : available; // Scan until we find what looks like the first byte of the EBML header. const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; @@ -544,8 +541,10 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { long len = 0; const long long ebml_id = ReadID(pReader, pos, len); - // TODO(tomfinegan): Move Matroska ID constants into a common namespace. - if (len != 4 || ebml_id != mkvmuxer::kMkvEBML) + if (ebml_id == E_BUFFER_NOT_FULL) + return E_BUFFER_NOT_FULL; + + if (len != 4 || ebml_id != libwebm::kMkvEBML) return E_FILE_FORMAT_INVALID; // Move read pos forward to the EBML header size field. @@ -584,7 +583,7 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if ((available - pos) < result) return pos + result; - end = pos + result; + const long long end = pos + result; Init(); @@ -599,27 +598,27 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if (size == 0) return E_FILE_FORMAT_INVALID; - if (id == mkvmuxer::kMkvEBMLVersion) { + if (id == libwebm::kMkvEBMLVersion) { m_version = UnserializeUInt(pReader, pos, size); if (m_version <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvEBMLReadVersion) { + } else if (id == libwebm::kMkvEBMLReadVersion) { m_readVersion = UnserializeUInt(pReader, pos, size); if (m_readVersion <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvEBMLMaxIDLength) { + } else if (id == libwebm::kMkvEBMLMaxIDLength) { m_maxIdLength = UnserializeUInt(pReader, pos, size); if (m_maxIdLength <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvEBMLMaxSizeLength) { + } else if (id == libwebm::kMkvEBMLMaxSizeLength) { m_maxSizeLength = UnserializeUInt(pReader, pos, size); if (m_maxSizeLength <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDocType) { + } else if (id == libwebm::kMkvDocType) { if (m_docType) return E_FILE_FORMAT_INVALID; @@ -627,12 +626,12 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if (status) // error return status; - } else if (id == mkvmuxer::kMkvDocTypeVersion) { + } else if (id == libwebm::kMkvDocTypeVersion) { m_docTypeVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeVersion <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDocTypeReadVersion) { + } else if (id == libwebm::kMkvDocTypeReadVersion) { m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeReadVersion <= 0) @@ -650,8 +649,8 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { return E_FILE_FORMAT_INVALID; // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. - if (m_maxIdLength <= 0 || m_maxIdLength > 4 || - m_maxSizeLength <= 0 || m_maxSizeLength > 8) + if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || + m_maxSizeLength > 8) return E_FILE_FORMAT_INVALID; return 0; @@ -786,7 +785,7 @@ long long Segment::CreateInstance(IMkvReader* pReader, long long pos, // Handle "unknown size" for live streaming of webm files. const long long unknown_size = (1LL << (7 * len)) - 1; - if (id == mkvmuxer::kMkvSegment) { + if (id == libwebm::kMkvSegment) { if (size == unknown_size) size = -1; @@ -878,7 +877,7 @@ long long Segment::ParseHeaders() { if (id < 0) return E_FILE_FORMAT_INVALID; - if (id == mkvmuxer::kMkvCluster) + if (id == libwebm::kMkvCluster) break; pos += len; // consume ID @@ -930,7 +929,7 @@ long long Segment::ParseHeaders() { if ((pos + size) > available) return pos + size; - if (id == mkvmuxer::kMkvInfo) { + if (id == libwebm::kMkvInfo) { if (m_pInfo) return E_FILE_FORMAT_INVALID; @@ -944,7 +943,7 @@ long long Segment::ParseHeaders() { if (status) return status; - } else if (id == mkvmuxer::kMkvTracks) { + } else if (id == libwebm::kMkvTracks) { if (m_pTracks) return E_FILE_FORMAT_INVALID; @@ -958,7 +957,7 @@ long long Segment::ParseHeaders() { if (status) return status; - } else if (id == mkvmuxer::kMkvCues) { + } else if (id == libwebm::kMkvCues) { if (m_pCues == NULL) { m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); @@ -966,7 +965,7 @@ long long Segment::ParseHeaders() { if (m_pCues == NULL) return -1; } - } else if (id == mkvmuxer::kMkvSeekHead) { + } else if (id == libwebm::kMkvSeekHead) { if (m_pSeekHead == NULL) { m_pSeekHead = new (std::nothrow) SeekHead(this, pos, size, element_start, element_size); @@ -979,7 +978,7 @@ long long Segment::ParseHeaders() { if (status) return status; } - } else if (id == mkvmuxer::kMkvChapters) { + } else if (id == libwebm::kMkvChapters) { if (m_pChapters == NULL) { m_pChapters = new (std::nothrow) Chapters(this, pos, size, element_start, element_size); @@ -992,7 +991,7 @@ long long Segment::ParseHeaders() { if (status) return status; } - } else if (id == mkvmuxer::kMkvTags) { + } else if (id == libwebm::kMkvTags) { if (m_pTags == NULL) { m_pTags = new (std::nothrow) Tags(this, pos, size, element_start, element_size); @@ -1131,7 +1130,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) { return E_FILE_FORMAT_INVALID; } - if (id == mkvmuxer::kMkvCues) { + if (id == libwebm::kMkvCues) { if (size == unknown_size) { // Cues element of unknown size: Not supported. return E_FILE_FORMAT_INVALID; @@ -1149,7 +1148,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) { continue; } - if (id != mkvmuxer::kMkvCluster) { + if (id != libwebm::kMkvCluster) { // Besides the Segment, Libwebm allows only cluster elements of unknown // size. Fail the parse upon encountering a non-cluster element reporting // unknown size. @@ -1466,7 +1465,7 @@ long Segment::Load() { return E_FILE_FORMAT_INVALID; for (;;) { - const int status = LoadCluster(); + const long status = LoadCluster(); if (status < 0) // error return status; @@ -1512,9 +1511,9 @@ long SeekHead::Parse() { if (status < 0) // error return status; - if (id == mkvmuxer::kMkvSeek) + if (id == libwebm::kMkvSeek) ++entry_count; - else if (id == mkvmuxer::kMkvVoid) + else if (id == libwebm::kMkvVoid) ++void_element_count; pos += size; // consume payload @@ -1553,14 +1552,14 @@ long SeekHead::Parse() { if (status < 0) // error return status; - if (id == mkvmuxer::kMkvSeek) { + if (id == libwebm::kMkvSeek) { if (ParseEntry(pReader, pos, size, pEntry)) { Entry& e = *pEntry++; e.element_start = idpos; e.element_size = (pos + size) - idpos; } - } else if (id == mkvmuxer::kMkvVoid) { + } else if (id == libwebm::kMkvVoid) { VoidElement& e = *pVoidElement++; e.element_start = idpos; @@ -1664,7 +1663,7 @@ long Segment::ParseCues(long long off, long long& pos, long& len) { const long long id = ReadID(m_pReader, idpos, len); - if (id != mkvmuxer::kMkvCues) + if (id != libwebm::kMkvCues) return E_FILE_FORMAT_INVALID; pos += len; // consume ID @@ -1746,7 +1745,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, if (seekIdId < 0) return false; - if (seekIdId != mkvmuxer::kMkvSeekID) + if (seekIdId != libwebm::kMkvSeekID) return false; if ((pos + len) > stop) @@ -1790,7 +1789,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, const long long seekPosId = ReadID(pReader, pos, len); - if (seekPosId != mkvmuxer::kMkvSeekPosition) + if (seekPosId != libwebm::kMkvSeekPosition) return false; if ((pos + len) > stop) @@ -1900,7 +1899,7 @@ bool Cues::Init() const { return false; } - if (id == mkvmuxer::kMkvCuePoint) { + if (id == libwebm::kMkvCuePoint) { if (!PreloadCuePoint(cue_points_size, idpos)) return false; } @@ -1975,7 +1974,7 @@ bool Cues::LoadCuePoint() const { if ((m_pos + size) > stop) return false; - if (id != mkvmuxer::kMkvCuePoint) { + if (id != libwebm::kMkvCuePoint) { m_pos += size; // consume payload if (m_pos > stop) return false; @@ -2105,8 +2104,8 @@ const CuePoint* Cues::GetLast() const { } const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { - if (pCurr == NULL || pCurr->GetTimeCode() < 0 || - m_cue_points == NULL || m_count < 1) { + if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || + m_count < 1) { return NULL; } @@ -2286,7 +2285,7 @@ bool CuePoint::Load(IMkvReader* pReader) { long len; const long long id = ReadID(pReader, pos_, len); - if (id != mkvmuxer::kMkvCuePoint) + if (id != libwebm::kMkvCuePoint) return false; pos_ += len; // consume ID @@ -2326,10 +2325,10 @@ bool CuePoint::Load(IMkvReader* pReader) { return false; } - if (id == mkvmuxer::kMkvCueTime) + if (id == libwebm::kMkvCueTime) m_timecode = UnserializeUInt(pReader, pos, size); - else if (id == mkvmuxer::kMkvCueTrackPositions) + else if (id == libwebm::kMkvCueTrackPositions) ++m_track_positions_count; pos += size; // consume payload @@ -2368,7 +2367,7 @@ bool CuePoint::Load(IMkvReader* pReader) { pos += len; // consume Size field assert((pos + size) <= stop); - if (id == mkvmuxer::kMkvCueTrackPositions) { + if (id == libwebm::kMkvCueTrackPositions) { TrackPosition& tp = *p++; if (!tp.Parse(pReader, pos, size)) { return false; @@ -2417,11 +2416,11 @@ bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, return false; } - if (id == mkvmuxer::kMkvCueTrack) + if (id == libwebm::kMkvCueTrack) m_track = UnserializeUInt(pReader, pos, size); - else if (id == mkvmuxer::kMkvCueClusterPosition) + else if (id == libwebm::kMkvCueClusterPosition) m_pos = UnserializeUInt(pReader, pos, size); - else if (id == mkvmuxer::kMkvCueBlockNumber) + else if (id == libwebm::kMkvCueBlockNumber) m_block = UnserializeUInt(pReader, pos, size); pos += size; // consume payload @@ -2555,7 +2554,7 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { return NULL; const long long id = ReadID(m_pReader, pos, len); - if (id != mkvmuxer::kMkvCluster) + if (id != libwebm::kMkvCluster) return NULL; pos += len; // consume ID @@ -2612,7 +2611,7 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { if (size == 0) // weird continue; - if (id == mkvmuxer::kMkvCluster) { + if (id == libwebm::kMkvCluster) { const long long off_next_ = idpos - m_start; long long pos_; @@ -2762,7 +2761,7 @@ long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, const long long id = ReadUInt(m_pReader, pos, len); - if (id != mkvmuxer::kMkvCluster) + if (id != libwebm::kMkvCluster) return -1; pos += len; // consume ID @@ -2927,7 +2926,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { return E_FILE_FORMAT_INVALID; } - if (id == mkvmuxer::kMkvCues) { + if (id == libwebm::kMkvCues) { if (size == unknown_size) return E_FILE_FORMAT_INVALID; @@ -2953,7 +2952,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { continue; } - if (id != mkvmuxer::kMkvCluster) { // not a Cluster ID + if (id != libwebm::kMkvCluster) { // not a Cluster ID if (size == unknown_size) return E_FILE_FORMAT_INVALID; @@ -3091,7 +3090,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == mkvmuxer::kMkvCluster || id == mkvmuxer::kMkvCues) + if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) break; pos += len; // consume ID (of sub-element) @@ -3259,7 +3258,7 @@ long Chapters::Parse() { if (size == 0) // weird continue; - if (id == mkvmuxer::kMkvEditionEntry) { + if (id == libwebm::kMkvEditionEntry) { status = ParseEdition(pos, size); if (status < 0) // error @@ -3375,7 +3374,7 @@ long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, if (size == 0) continue; - if (id == mkvmuxer::kMkvChapterAtom) { + if (id == libwebm::kMkvChapterAtom) { status = ParseAtom(pReader, pos, size); if (status < 0) // error @@ -3508,17 +3507,17 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { if (size == 0) // 0 length payload, skip. continue; - if (id == mkvmuxer::kMkvChapterDisplay) { + if (id == libwebm::kMkvChapterDisplay) { status = ParseDisplay(pReader, pos, size); if (status < 0) // error return status; - } else if (id == mkvmuxer::kMkvChapterStringUID) { + } else if (id == libwebm::kMkvChapterStringUID) { status = UnserializeString(pReader, pos, size, m_string_uid); if (status < 0) // error return status; - } else if (id == mkvmuxer::kMkvChapterUID) { + } else if (id == libwebm::kMkvChapterUID) { long long val; status = UnserializeInt(pReader, pos, size, val); @@ -3526,14 +3525,14 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { return status; m_uid = static_cast<unsigned long long>(val); - } else if (id == mkvmuxer::kMkvChapterTimeStart) { + } else if (id == libwebm::kMkvChapterTimeStart) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error return static_cast<long>(val); m_start_timecode = val; - } else if (id == mkvmuxer::kMkvChapterTimeEnd) { + } else if (id == libwebm::kMkvChapterTimeEnd) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error @@ -3661,17 +3660,17 @@ long Chapters::Display::Parse(IMkvReader* pReader, long long pos, if (size == 0) // No payload. continue; - if (id == mkvmuxer::kMkvChapString) { + if (id == libwebm::kMkvChapString) { status = UnserializeString(pReader, pos, size, m_string); if (status) return status; - } else if (id == mkvmuxer::kMkvChapLanguage) { + } else if (id == libwebm::kMkvChapLanguage) { status = UnserializeString(pReader, pos, size, m_language); if (status) return status; - } else if (id == mkvmuxer::kMkvChapCountry) { + } else if (id == libwebm::kMkvChapCountry) { status = UnserializeString(pReader, pos, size, m_country); if (status) @@ -3724,7 +3723,7 @@ long Tags::Parse() { if (size == 0) // 0 length tag, read another continue; - if (id == mkvmuxer::kMkvTag) { + if (id == libwebm::kMkvTag) { status = ParseTag(pos, size); if (status < 0) @@ -3840,7 +3839,7 @@ long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { if (size == 0) // 0 length tag, read another continue; - if (id == mkvmuxer::kMkvSimpleTag) { + if (id == libwebm::kMkvSimpleTag) { status = ParseSimpleTag(pReader, pos, size); if (status < 0) @@ -3931,12 +3930,12 @@ long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, if (size == 0) // weird continue; - if (id == mkvmuxer::kMkvTagName) { + if (id == libwebm::kMkvTagName) { status = UnserializeString(pReader, pos, size, m_tag_name); if (status) return status; - } else if (id == mkvmuxer::kMkvTagString) { + } else if (id == libwebm::kMkvTagString) { status = UnserializeString(pReader, pos, size, m_tag_string); if (status) @@ -3996,12 +3995,12 @@ long SegmentInfo::Parse() { if (status < 0) // error return status; - if (id == mkvmuxer::kMkvTimecodeScale) { + if (id == libwebm::kMkvTimecodeScale) { m_timecodeScale = UnserializeUInt(pReader, pos, size); if (m_timecodeScale <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDuration) { + } else if (id == libwebm::kMkvDuration) { const long status = UnserializeFloat(pReader, pos, size, m_duration); if (status < 0) @@ -4009,19 +4008,19 @@ long SegmentInfo::Parse() { if (m_duration < 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvMuxingApp) { + } else if (id == libwebm::kMkvMuxingApp) { const long status = UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); if (status) return status; - } else if (id == mkvmuxer::kMkvWritingApp) { + } else if (id == libwebm::kMkvWritingApp) { const long status = UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); if (status) return status; - } else if (id == mkvmuxer::kMkvTitle) { + } else if (id == libwebm::kMkvTitle) { const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); if (status) @@ -4176,7 +4175,7 @@ long ContentEncoding::ParseContentEncAESSettingsEntry( if (status < 0) // error return status; - if (id == mkvmuxer::kMkvAESSettingsCipherMode) { + if (id == libwebm::kMkvAESSettingsCipherMode) { aes->cipher_mode = UnserializeUInt(pReader, pos, size); if (aes->cipher_mode != 1) return E_FILE_FORMAT_INVALID; @@ -4207,10 +4206,10 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvContentCompression) + if (id == libwebm::kMkvContentCompression) ++compression_count; - if (id == mkvmuxer::kMkvContentEncryption) + if (id == libwebm::kMkvContentEncryption) ++encryption_count; pos += size; // consume payload @@ -4246,15 +4245,15 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvContentEncodingOrder) { + if (id == libwebm::kMkvContentEncodingOrder) { encoding_order_ = UnserializeUInt(pReader, pos, size); - } else if (id == mkvmuxer::kMkvContentEncodingScope) { + } else if (id == libwebm::kMkvContentEncodingScope) { encoding_scope_ = UnserializeUInt(pReader, pos, size); if (encoding_scope_ < 1) return -1; - } else if (id == mkvmuxer::kMkvContentEncodingType) { + } else if (id == libwebm::kMkvContentEncodingType) { encoding_type_ = UnserializeUInt(pReader, pos, size); - } else if (id == mkvmuxer::kMkvContentCompression) { + } else if (id == libwebm::kMkvContentCompression) { ContentCompression* const compression = new (std::nothrow) ContentCompression(); if (!compression) @@ -4266,7 +4265,7 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, return status; } *compression_entries_end_++ = compression; - } else if (id == mkvmuxer::kMkvContentEncryption) { + } else if (id == libwebm::kMkvContentEncryption) { ContentEncryption* const encryption = new (std::nothrow) ContentEncryption(); if (!encryption) @@ -4307,13 +4306,13 @@ long ContentEncoding::ParseCompressionEntry(long long start, long long size, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvContentCompAlgo) { + if (id == libwebm::kMkvContentCompAlgo) { long long algo = UnserializeUInt(pReader, pos, size); if (algo < 0) return E_FILE_FORMAT_INVALID; compression->algo = algo; valid = true; - } else if (id == mkvmuxer::kMkvContentCompSettings) { + } else if (id == libwebm::kMkvContentCompSettings) { if (size <= 0) return E_FILE_FORMAT_INVALID; @@ -4360,11 +4359,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvContentEncAlgo) { + if (id == libwebm::kMkvContentEncAlgo) { encryption->algo = UnserializeUInt(pReader, pos, size); if (encryption->algo != 5) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvContentEncKeyID) { + } else if (id == libwebm::kMkvContentEncKeyID) { delete[] encryption->key_id; encryption->key_id = NULL; encryption->key_id_len = 0; @@ -4386,7 +4385,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->key_id = buf; encryption->key_id_len = buflen; - } else if (id == mkvmuxer::kMkvContentSignature) { + } else if (id == libwebm::kMkvContentSignature) { delete[] encryption->signature; encryption->signature = NULL; encryption->signature_len = 0; @@ -4408,7 +4407,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->signature = buf; encryption->signature_len = buflen; - } else if (id == mkvmuxer::kMkvContentSigKeyID) { + } else if (id == libwebm::kMkvContentSigKeyID) { delete[] encryption->sig_key_id; encryption->sig_key_id = NULL; encryption->sig_key_id_len = 0; @@ -4430,11 +4429,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->sig_key_id = buf; encryption->sig_key_id_len = buflen; - } else if (id == mkvmuxer::kMkvContentSigAlgo) { + } else if (id == libwebm::kMkvContentSigAlgo) { encryption->sig_algo = UnserializeUInt(pReader, pos, size); - } else if (id == mkvmuxer::kMkvContentSigHashAlgo) { + } else if (id == libwebm::kMkvContentSigHashAlgo) { encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); - } else if (id == mkvmuxer::kMkvContentEncAESSettings) { + } else if (id == libwebm::kMkvContentEncAESSettings) { const long status = ParseContentEncAESSettingsEntry( pos, size, pReader, &encryption->aes_settings); if (status) @@ -4921,7 +4920,7 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) { return status; // pos now designates start of element - if (id == mkvmuxer::kMkvContentEncoding) + if (id == libwebm::kMkvContentEncoding) ++count; pos += size; // consume payload @@ -4946,7 +4945,7 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) { return status; // pos now designates start of element - if (id == mkvmuxer::kMkvContentEncoding) { + if (id == libwebm::kMkvContentEncoding) { ContentEncoding* const content_encoding = new (std::nothrow) ContentEncoding(); if (!content_encoding) @@ -4978,9 +4977,222 @@ BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } const Block* Track::EOSBlock::GetBlock() const { return NULL; } +bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, + long long value_size, bool is_x, + PrimaryChromaticity** chromaticity) { + if (!reader) + return false; + + std::auto_ptr<PrimaryChromaticity> chromaticity_ptr; + + if (!*chromaticity) { + chromaticity_ptr.reset(new PrimaryChromaticity()); + } else { + chromaticity_ptr.reset(*chromaticity); + } + + if (!chromaticity_ptr.get()) + return false; + + float* value = is_x ? &chromaticity_ptr->x : &chromaticity_ptr->y; + + double parser_value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, value_size, parser_value); + + *value = static_cast<float>(parser_value); + + if (value_parse_status < 0 || *value < 0.0 || *value > 1.0) + return false; + + *chromaticity = chromaticity_ptr.release(); + return true; +} + +bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, + long long mm_size, MasteringMetadata** mm) { + if (!reader || *mm) + return false; + + std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); + if (!mm_ptr.get()) + return false; + + const long long mm_end = mm_start + mm_size; + long long read_pos = mm_start; + + while (read_pos < mm_end) { + long long child_id = 0; + long long child_size = 0; + + const long long status = + ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); + if (status < 0) + return false; + + if (child_id == libwebm::kMkvLuminanceMax) { + double value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, child_size, value); + mm_ptr->luminance_max = static_cast<float>(value); + if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || + mm_ptr->luminance_max > 9999.99) { + return false; + } + } else if (child_id == libwebm::kMkvLuminanceMin) { + double value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, child_size, value); + mm_ptr->luminance_min = static_cast<float>(value); + if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || + mm_ptr->luminance_min > 999.9999) { + return false; + } + } else { + bool is_x = false; + PrimaryChromaticity** chromaticity; + switch (child_id) { + case libwebm::kMkvPrimaryRChromaticityX: + case libwebm::kMkvPrimaryRChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; + chromaticity = &mm_ptr->r; + break; + case libwebm::kMkvPrimaryGChromaticityX: + case libwebm::kMkvPrimaryGChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; + chromaticity = &mm_ptr->g; + break; + case libwebm::kMkvPrimaryBChromaticityX: + case libwebm::kMkvPrimaryBChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; + chromaticity = &mm_ptr->b; + break; + case libwebm::kMkvWhitePointChromaticityX: + case libwebm::kMkvWhitePointChromaticityY: + is_x = child_id == libwebm::kMkvWhitePointChromaticityX; + chromaticity = &mm_ptr->white_point; + break; + default: + return false; + } + const bool value_parse_status = PrimaryChromaticity::Parse( + reader, read_pos, child_size, is_x, chromaticity); + if (!value_parse_status) + return false; + } + + read_pos += child_size; + if (read_pos > mm_end) + return false; + } + + *mm = mm_ptr.release(); + return true; +} + +bool Colour::Parse(IMkvReader* reader, long long colour_start, + long long colour_size, Colour** colour) { + if (!reader || *colour) + return false; + + std::auto_ptr<Colour> colour_ptr(new Colour()); + if (!colour_ptr.get()) + return false; + + const long long colour_end = colour_start + colour_size; + long long read_pos = colour_start; + + while (read_pos < colour_end) { + long long child_id = 0; + long long child_size = 0; + + const long status = + ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); + if (status < 0) + return false; + + if (child_id == libwebm::kMkvMatrixCoefficients) { + colour_ptr->matrix_coefficients = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->matrix_coefficients < 0) + return false; + } else if (child_id == libwebm::kMkvBitsPerChannel) { + colour_ptr->bits_per_channel = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->bits_per_channel < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { + colour_ptr->chroma_subsampling_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_subsampling_horz < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { + colour_ptr->chroma_subsampling_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_subsampling_vert < 0) + return false; + } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { + colour_ptr->cb_subsampling_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->cb_subsampling_horz < 0) + return false; + } else if (child_id == libwebm::kMkvCbSubsamplingVert) { + colour_ptr->cb_subsampling_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->cb_subsampling_vert < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSitingHorz) { + colour_ptr->chroma_siting_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_siting_horz < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSitingVert) { + colour_ptr->chroma_siting_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_siting_vert < 0) + return false; + } else if (child_id == libwebm::kMkvRange) { + colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->range < 0) + return false; + } else if (child_id == libwebm::kMkvTransferCharacteristics) { + colour_ptr->transfer_characteristics = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->transfer_characteristics < 0) + return false; + } else if (child_id == libwebm::kMkvPrimaries) { + colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->primaries < 0) + return false; + } else if (child_id == libwebm::kMkvMaxCLL) { + colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->max_cll < 0) + return false; + } else if (child_id == libwebm::kMkvMaxFALL) { + colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->max_fall < 0) + return false; + } else if (child_id == libwebm::kMkvMasteringMetadata) { + if (!MasteringMetadata::Parse(reader, read_pos, child_size, + &colour_ptr->mastering_metadata)) + return false; + } else { + return false; + } + + read_pos += child_size; + if (read_pos > colour_end) + return false; + } + *colour = colour_ptr.release(); + return true; +} + VideoTrack::VideoTrack(Segment* pSegment, long long element_start, long long element_size) - : Track(pSegment, element_start, element_size) {} + : Track(pSegment, element_start, element_size), m_colour(NULL) {} + +VideoTrack::~VideoTrack() { delete m_colour; } long VideoTrack::Parse(Segment* pSegment, const Info& info, long long element_start, long long element_size, @@ -5011,6 +5223,8 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, const long long stop = pos + s.size; + Colour* colour = NULL; + while (pos < stop) { long long id, size; @@ -5019,37 +5233,37 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvPixelWidth) { + if (id == libwebm::kMkvPixelWidth) { width = UnserializeUInt(pReader, pos, size); if (width <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvPixelHeight) { + } else if (id == libwebm::kMkvPixelHeight) { height = UnserializeUInt(pReader, pos, size); if (height <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDisplayWidth) { + } else if (id == libwebm::kMkvDisplayWidth) { display_width = UnserializeUInt(pReader, pos, size); if (display_width <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDisplayHeight) { + } else if (id == libwebm::kMkvDisplayHeight) { display_height = UnserializeUInt(pReader, pos, size); if (display_height <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvDisplayUnit) { + } else if (id == libwebm::kMkvDisplayUnit) { display_unit = UnserializeUInt(pReader, pos, size); if (display_unit < 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvStereoMode) { + } else if (id == libwebm::kMkvStereoMode) { stereo_mode = UnserializeUInt(pReader, pos, size); if (stereo_mode < 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvFrameRate) { + } else if (id == libwebm::kMkvFrameRate) { const long status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) @@ -5057,6 +5271,9 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, if (rate <= 0) return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvColour) { + if (!Colour::Parse(pReader, pos, size, &colour)) + return E_FILE_FORMAT_INVALID; } pos += size; // consume payload @@ -5087,6 +5304,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, pTrack->m_display_unit = display_unit; pTrack->m_stereo_mode = stereo_mode; pTrack->m_rate = rate; + pTrack->m_colour = colour; pResult = pTrack; return 0; // success @@ -5185,6 +5403,8 @@ long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { return 0; } +Colour* VideoTrack::GetColour() const { return m_colour; } + long long VideoTrack::GetWidth() const { return m_width; } long long VideoTrack::GetHeight() const { return m_height; } @@ -5239,7 +5459,7 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info, if (status < 0) // error return status; - if (id == mkvmuxer::kMkvSamplingFrequency) { + if (id == libwebm::kMkvSamplingFrequency) { status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) @@ -5247,12 +5467,12 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info, if (rate <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvChannels) { + } else if (id == libwebm::kMkvChannels) { channels = UnserializeUInt(pReader, pos, size); if (channels <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvBitDepth) { + } else if (id == libwebm::kMkvBitDepth) { bit_depth = UnserializeUInt(pReader, pos, size); if (bit_depth <= 0) @@ -5325,7 +5545,7 @@ long Tracks::Parse() { if (size == 0) // weird continue; - if (id == mkvmuxer::kMkvTrackEntry) + if (id == libwebm::kMkvTrackEntry) ++count; pos += size; // consume payload @@ -5367,7 +5587,7 @@ long Tracks::Parse() { const long long element_size = payload_stop - element_start; - if (id == mkvmuxer::kMkvTrackEntry) { + if (id == libwebm::kMkvTrackEntry) { Track*& pTrack = *m_trackEntriesEnd; pTrack = NULL; @@ -5443,16 +5663,16 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, const long long start = pos; - if (id == mkvmuxer::kMkvVideo) { + if (id == libwebm::kMkvVideo) { v.start = start; v.size = size; - } else if (id == mkvmuxer::kMkvAudio) { + } else if (id == libwebm::kMkvAudio) { a.start = start; a.size = size; - } else if (id == mkvmuxer::kMkvContentEncodings) { + } else if (id == libwebm::kMkvContentEncodings) { e.start = start; e.size = size; - } else if (id == mkvmuxer::kMkvTrackUID) { + } else if (id == libwebm::kMkvTrackUID) { if (size > 8) return E_FILE_FORMAT_INVALID; @@ -5474,49 +5694,49 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, ++pos_; } - } else if (id == mkvmuxer::kMkvTrackNumber) { + } else if (id == libwebm::kMkvTrackNumber) { const long long num = UnserializeUInt(pReader, pos, size); if ((num <= 0) || (num > 127)) return E_FILE_FORMAT_INVALID; info.number = static_cast<long>(num); - } else if (id == mkvmuxer::kMkvTrackType) { + } else if (id == libwebm::kMkvTrackType) { const long long type = UnserializeUInt(pReader, pos, size); if ((type <= 0) || (type > 254)) return E_FILE_FORMAT_INVALID; info.type = static_cast<long>(type); - } else if (id == mkvmuxer::kMkvName) { + } else if (id == libwebm::kMkvName) { const long status = UnserializeString(pReader, pos, size, info.nameAsUTF8); if (status) return status; - } else if (id == mkvmuxer::kMkvLanguage) { + } else if (id == libwebm::kMkvLanguage) { const long status = UnserializeString(pReader, pos, size, info.language); if (status) return status; - } else if (id == mkvmuxer::kMkvDefaultDuration) { + } else if (id == libwebm::kMkvDefaultDuration) { const long long duration = UnserializeUInt(pReader, pos, size); if (duration < 0) return E_FILE_FORMAT_INVALID; info.defaultDuration = static_cast<unsigned long long>(duration); - } else if (id == mkvmuxer::kMkvCodecID) { + } else if (id == libwebm::kMkvCodecID) { const long status = UnserializeString(pReader, pos, size, info.codecId); if (status) return status; - } else if (id == mkvmuxer::kMkvFlagLacing) { + } else if (id == libwebm::kMkvFlagLacing) { lacing = UnserializeUInt(pReader, pos, size); if ((lacing < 0) || (lacing > 1)) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvCodecPrivate) { + } else if (id == libwebm::kMkvCodecPrivate) { delete[] info.codecPrivate; info.codecPrivate = NULL; info.codecPrivateSize = 0; @@ -5539,15 +5759,15 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, info.codecPrivate = buf; info.codecPrivateSize = buflen; } - } else if (id == mkvmuxer::kMkvCodecName) { + } else if (id == libwebm::kMkvCodecName) { const long status = UnserializeString(pReader, pos, size, info.codecNameAsUTF8); if (status) return status; - } else if (id == mkvmuxer::kMkvCodecDelay) { + } else if (id == libwebm::kMkvCodecDelay) { info.codecDelay = UnserializeUInt(pReader, pos, size); - } else if (id == mkvmuxer::kMkvSeekPreRoll) { + } else if (id == libwebm::kMkvSeekPreRoll) { info.seekPreRoll = UnserializeUInt(pReader, pos, size); } @@ -5730,7 +5950,7 @@ long Cluster::Load(long long& pos, long& len) const { if (id_ < 0) // error return static_cast<long>(id_); - if (id_ != mkvmuxer::kMkvCluster) + if (id_ != libwebm::kMkvCluster) return E_FILE_FORMAT_INVALID; pos += len; // consume id @@ -5812,10 +6032,10 @@ long Cluster::Load(long long& pos, long& len) const { // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == mkvmuxer::kMkvCluster) + if (id == libwebm::kMkvCluster) break; - if (id == mkvmuxer::kMkvCues) + if (id == libwebm::kMkvCues) break; pos += len; // consume ID field @@ -5864,7 +6084,7 @@ long Cluster::Load(long long& pos, long& len) const { if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; - if (id == mkvmuxer::kMkvTimecode) { + if (id == libwebm::kMkvTimecode) { len = static_cast<long>(size); if ((pos + size) > avail) @@ -5879,10 +6099,10 @@ long Cluster::Load(long long& pos, long& len) const { if (bBlock) break; - } else if (id == mkvmuxer::kMkvBlockGroup) { + } else if (id == libwebm::kMkvBlockGroup) { bBlock = true; break; - } else if (id == mkvmuxer::kMkvSimpleBlock) { + } else if (id == libwebm::kMkvSimpleBlock) { bBlock = true; break; } @@ -5980,7 +6200,7 @@ long Cluster::Parse(long long& pos, long& len) const { // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if ((id == mkvmuxer::kMkvCluster) || (id == mkvmuxer::kMkvCues)) { + if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { if (m_element_size < 0) m_element_size = pos - m_element_start; @@ -6035,8 +6255,7 @@ long Cluster::Parse(long long& pos, long& len) const { if (cluster_stop >= 0) { if (block_stop > cluster_stop) { - if (id == mkvmuxer::kMkvBlockGroup || - id == mkvmuxer::kMkvSimpleBlock) { + if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { return E_FILE_FORMAT_INVALID; } @@ -6054,10 +6273,10 @@ long Cluster::Parse(long long& pos, long& len) const { Cluster* const this_ = const_cast<Cluster*>(this); - if (id == mkvmuxer::kMkvBlockGroup) + if (id == libwebm::kMkvBlockGroup) return this_->ParseBlockGroup(size, pos, len); - if (id == mkvmuxer::kMkvSimpleBlock) + if (id == libwebm::kMkvSimpleBlock) return this_->ParseSimpleBlock(size, pos, len); pos += size; // consume payload @@ -6188,8 +6407,7 @@ long Cluster::ParseSimpleBlock(long long block_size, long long& pos, return E_BUFFER_NOT_FULL; } - status = CreateBlock(mkvmuxer::kMkvSimpleBlock, - block_start, block_size, + status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, 0); // DiscardPadding if (status != 0) @@ -6299,14 +6517,14 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos, if (size == unknown_size) return E_FILE_FORMAT_INVALID; - if (id == mkvmuxer::kMkvDiscardPadding) { + if (id == libwebm::kMkvDiscardPadding) { status = UnserializeInt(pReader, pos, size, discard_padding); if (status < 0) // error return status; } - if (id != mkvmuxer::kMkvBlock) { + if (id != libwebm::kMkvBlock) { pos += size; // consume sub-part of block group if (pos > payload_stop) @@ -6399,8 +6617,8 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos, if (pos != payload_stop) return E_FILE_FORMAT_INVALID; - status = CreateBlock(mkvmuxer::kMkvBlockGroup, - payload_start, payload_size, discard_padding); + status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, + discard_padding); if (status != 0) return status; @@ -6565,7 +6783,7 @@ long Cluster::HasBlockEntries( if (id < 0) // error return static_cast<long>(id); - if (id != mkvmuxer::kMkvCluster) + if (id != libwebm::kMkvCluster) return E_PARSE_FAILED; pos += len; // consume Cluster ID field @@ -6653,10 +6871,10 @@ long Cluster::HasBlockEntries( // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == mkvmuxer::kMkvCluster) + if (id == libwebm::kMkvCluster) return 0; // no entries found - if (id == mkvmuxer::kMkvCues) + if (id == libwebm::kMkvCues) return 0; // no entries found pos += len; // consume id field @@ -6708,10 +6926,10 @@ long Cluster::HasBlockEntries( if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; - if (id == mkvmuxer::kMkvBlockGroup) + if (id == libwebm::kMkvBlockGroup) return 1; // have at least one entry - if (id == mkvmuxer::kMkvSimpleBlock) + if (id == libwebm::kMkvSimpleBlock) return 1; // have at least one entry pos += size; // consume payload @@ -6786,7 +7004,7 @@ long long Cluster::GetLastTime() const { long Cluster::CreateBlock(long long id, long long pos, // absolute pos of payload long long size, long long discard_padding) { - if (id != mkvmuxer::kMkvBlockGroup && id != mkvmuxer::kMkvSimpleBlock) + if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) return E_PARSE_FAILED; if (m_entries_count < 0) { // haven't parsed anything yet @@ -6826,7 +7044,7 @@ long Cluster::CreateBlock(long long id, } } - if (id == mkvmuxer::kMkvBlockGroup) + if (id == libwebm::kMkvBlockGroup) return CreateBlockGroup(pos, size, discard_padding); else return CreateSimpleBlock(pos, size); @@ -6871,12 +7089,12 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, pos += len; // consume size - if (id == mkvmuxer::kMkvBlock) { + if (id == libwebm::kMkvBlock) { if (bpos < 0) { // Block ID bpos = pos; bsize = size; } - } else if (id == mkvmuxer::kMkvBlockDuration) { + } else if (id == libwebm::kMkvBlockDuration) { if (size > 8) return E_FILE_FORMAT_INVALID; @@ -6884,7 +7102,7 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, if (duration < 0) return E_FILE_FORMAT_INVALID; - } else if (id == mkvmuxer::kMkvReferenceBlock) { + } else if (id == libwebm::kMkvReferenceBlock) { if (size > 8 || size <= 0) return E_FILE_FORMAT_INVALID; const long size_ = static_cast<long>(size); @@ -7231,7 +7449,6 @@ const BlockEntry* Cluster::GetEntry(const CuePoint& cp, BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} BlockEntry::~BlockEntry() {} -bool BlockEntry::EOS() const { return (GetKind() == kBlockEOS); } const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } long BlockEntry::GetIndex() const { return m_index; } @@ -7555,7 +7772,6 @@ long Block::Parse(const Cluster* pCluster) { if (pf >= pf_end) return E_FILE_FORMAT_INVALID; - const Frame& prev = *pf++; assert(prev.len == frame_size); if (prev.len != frame_size) @@ -7581,7 +7797,7 @@ long Block::Parse(const Cluster* pCluster) { if (pos > stop) return E_FILE_FORMAT_INVALID; - const int exp = 7 * len - 1; + const long exp = 7 * len - 1; const long long bias = (1LL << exp) - 1LL; const long long delta_size = delta_size_ - bias; @@ -7721,4 +7937,4 @@ long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { long long Block::GetDiscardPadding() const { return m_discard_padding; } -} // end namespace mkvparser +} // namespace mkvparser diff --git a/libvpx/third_party/libwebm/mkvparser.hpp b/libvpx/third_party/libwebm/mkvparser/mkvparser.h index 75ef69d76..42e6e88ab 100644 --- a/libvpx/third_party/libwebm/mkvparser.hpp +++ b/libvpx/third_party/libwebm/mkvparser/mkvparser.h @@ -5,13 +5,10 @@ // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVPARSER_HPP -#define MKVPARSER_HPP +#ifndef MKVPARSER_MKVPARSER_H_ +#define MKVPARSER_MKVPARSER_H_ #include <cstddef> -#include <cstdio> -#include <cstdlib> namespace mkvparser { @@ -28,8 +25,9 @@ class IMkvReader { virtual ~IMkvReader(); }; -template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size); +template <typename Type> +Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size); long long GetUIntLength(IMkvReader*, long long, long&); long long ReadUInt(IMkvReader*, long long, long&); long long ReadID(IMkvReader* pReader, long long pos, long& len); @@ -128,7 +126,7 @@ class BlockEntry { public: virtual ~BlockEntry(); - bool EOS() const; + bool EOS() const { return (GetKind() == kBlockEOS); } const Cluster* GetCluster() const; long GetIndex() const; virtual const Block* GetBlock() const = 0; @@ -391,6 +389,90 @@ class Track { ContentEncoding** content_encoding_entries_end_; }; +struct PrimaryChromaticity { + PrimaryChromaticity() : x(0), y(0) {} + ~PrimaryChromaticity() {} + static bool Parse(IMkvReader* reader, long long read_pos, + long long value_size, bool is_x, + PrimaryChromaticity** chromaticity); + float x; + float y; +}; + +struct MasteringMetadata { + static const float kValueNotPresent; + + MasteringMetadata() + : r(NULL), + g(NULL), + b(NULL), + white_point(NULL), + luminance_max(kValueNotPresent), + luminance_min(kValueNotPresent) {} + ~MasteringMetadata() { + delete r; + delete g; + delete b; + delete white_point; + } + + static bool Parse(IMkvReader* reader, long long element_start, + long long element_size, + MasteringMetadata** mastering_metadata); + + PrimaryChromaticity* r; + PrimaryChromaticity* g; + PrimaryChromaticity* b; + PrimaryChromaticity* white_point; + float luminance_max; + float luminance_min; +}; + +struct Colour { + static const long long kValueNotPresent; + + // Unless otherwise noted all values assigned upon construction are the + // equivalent of unspecified/default. + Colour() + : matrix_coefficients(kValueNotPresent), + bits_per_channel(kValueNotPresent), + chroma_subsampling_horz(kValueNotPresent), + chroma_subsampling_vert(kValueNotPresent), + cb_subsampling_horz(kValueNotPresent), + cb_subsampling_vert(kValueNotPresent), + chroma_siting_horz(kValueNotPresent), + chroma_siting_vert(kValueNotPresent), + range(kValueNotPresent), + transfer_characteristics(kValueNotPresent), + primaries(kValueNotPresent), + max_cll(kValueNotPresent), + max_fall(kValueNotPresent), + mastering_metadata(NULL) {} + ~Colour() { + delete mastering_metadata; + mastering_metadata = NULL; + } + + static bool Parse(IMkvReader* reader, long long element_start, + long long element_size, Colour** colour); + + long long matrix_coefficients; + long long bits_per_channel; + long long chroma_subsampling_horz; + long long chroma_subsampling_vert; + long long cb_subsampling_horz; + long long cb_subsampling_vert; + long long chroma_siting_horz; + long long chroma_siting_vert; + long long range; + long long transfer_characteristics; + long long primaries; + long long max_cll; + long long max_fall; + + MasteringMetadata* mastering_metadata; +}; + class VideoTrack : public Track { VideoTrack(const VideoTrack&); VideoTrack& operator=(const VideoTrack&); @@ -398,6 +480,7 @@ class VideoTrack : public Track { VideoTrack(Segment*, long long element_start, long long element_size); public: + virtual ~VideoTrack(); static long Parse(Segment*, const Info&, long long element_start, long long element_size, VideoTrack*&); @@ -412,6 +495,8 @@ class VideoTrack : public Track { bool VetEntry(const BlockEntry*) const; long Seek(long long time_ns, const BlockEntry*&) const; + Colour* GetColour() const; + private: long long m_width; long long m_height; @@ -421,6 +506,8 @@ class VideoTrack : public Track { long long m_stereo_mode; double m_rate; + + Colour* m_colour; }; class AudioTrack : public Track { @@ -1013,7 +1100,7 @@ class Segment { const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); }; -} // end namespace mkvparser +} // namespace mkvparser inline long mkvparser::Segment::LoadCluster() { long long pos; @@ -1022,4 +1109,4 @@ inline long mkvparser::Segment::LoadCluster() { return LoadCluster(pos, size); } -#endif // MKVPARSER_HPP +#endif // MKVPARSER_MKVPARSER_H_ diff --git a/libvpx/third_party/libwebm/mkvreader.cpp b/libvpx/third_party/libwebm/mkvparser/mkvreader.cc index eaf9e0a79..9f90d8c4f 100644 --- a/libvpx/third_party/libwebm/mkvreader.cpp +++ b/libvpx/third_party/libwebm/mkvparser/mkvreader.cc @@ -5,8 +5,7 @@ // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. - -#include "mkvreader.hpp" +#include "mkvparser/mkvreader.h" #include <cassert> @@ -129,4 +128,4 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) { return 0; // success } -} // end namespace mkvparser +} // namespace mkvparser
\ No newline at end of file diff --git a/libvpx/third_party/libwebm/mkvreader.hpp b/libvpx/third_party/libwebm/mkvparser/mkvreader.h index 82ebad544..9831ecf64 100644 --- a/libvpx/third_party/libwebm/mkvreader.hpp +++ b/libvpx/third_party/libwebm/mkvparser/mkvreader.h @@ -5,13 +5,13 @@ // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. +#ifndef MKVPARSER_MKVREADER_H_ +#define MKVPARSER_MKVREADER_H_ -#ifndef MKVREADER_HPP -#define MKVREADER_HPP - -#include "mkvparser.hpp" #include <cstdio> +#include "mkvparser/mkvparser.h" + namespace mkvparser { class MkvReader : public IMkvReader { @@ -40,6 +40,6 @@ class MkvReader : public IMkvReader { bool reader_owns_file_; }; -} // end namespace mkvparser +} // namespace mkvparser -#endif // MKVREADER_HPP +#endif // MKVPARSER_MKVREADER_H_ diff --git a/libvpx/third_party/x86inc/README.libvpx b/libvpx/third_party/x86inc/README.libvpx index e91e305a2..8d3cd966d 100644 --- a/libvpx/third_party/x86inc/README.libvpx +++ b/libvpx/third_party/x86inc/README.libvpx @@ -1,5 +1,5 @@ -URL: http://git.videolan.org/?p=x264.git -Version: a95584945dd9ce3acc66c6cd8f6796bc4404d40d +URL: https://git.videolan.org/git/x264.git +Version: d23d18655249944c1ca894b451e2c82c7a584c62 License: ISC License File: LICENSE @@ -13,12 +13,8 @@ Prefix functions with vpx by default. Manage name mangling (prefixing with '_') manually because 'PREFIX' does not exist in libvpx. Expand PIC default to macho64 and respect CONFIG_PIC from libvpx -Catch all elf formats for 'hidden' status and SECTION notes. -Avoid 'amdnop' when building with nasm. Set 'private_extern' visibility for macho targets. Copy PIC 'GLOBAL' macros from x86_abi_support.asm Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text with no alignment for aout Only use 'hidden' visibility with Chromium -Move '%use smartalign' for nasm out of 'INIT_CPUFLAGS' and before - 'ALIGNMODE'. diff --git a/libvpx/third_party/x86inc/x86inc.asm b/libvpx/third_party/x86inc/x86inc.asm index be59de311..b647dff2f 100644 --- a/libvpx/third_party/x86inc/x86inc.asm +++ b/libvpx/third_party/x86inc/x86inc.asm @@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2015 x264 project +;* Copyright (C) 2005-2016 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Anton Mitrofanov <BugMaster@narod.ru> @@ -66,16 +66,35 @@ %endif %endif -%ifidn __OUTPUT_FORMAT__,elf32 - %define mangle(x) x +%define FORMAT_ELF 0 +%ifidn __OUTPUT_FORMAT__,elf + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf32 + %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf64 - %define mangle(x) x -%elifidn __OUTPUT_FORMAT__,x64 - %define mangle(x) x -%elifidn __OUTPUT_FORMAT__,win64 - %define mangle(x) x + %define FORMAT_ELF 1 +%endif + +%define FORMAT_MACHO 0 +%ifidn __OUTPUT_FORMAT__,macho32 + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho64 + %define FORMAT_MACHO 1 +%endif + +; Set PREFIX for libvpx builds. +%if FORMAT_ELF + %undef PREFIX +%elif WIN64 + %undef PREFIX %else + %define PREFIX +%endif + +%ifdef PREFIX %define mangle(x) _ %+ x +%else + %define mangle(x) x %endif ; In some instances macho32 tables get misaligned when using .rodata. @@ -94,14 +113,6 @@ %endif %endmacro -%macro SECTION_TEXT 0-1 16 - %ifidn __OUTPUT_FORMAT__,aout - SECTION .text - %else - SECTION .text align=%1 - %endif -%endmacro - ; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" ; from original code is added in for 64bit. %ifidn __OUTPUT_FORMAT__,elf32 @@ -119,7 +130,7 @@ %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_SAVE_ARG 1 + %define GET_GOT_DEFINED 1 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ @@ -138,7 +149,7 @@ %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_SAVE_ARG 1 + %define GET_GOT_DEFINED 1 %macro GET_GOT 1 push %1 call %%get_got @@ -149,6 +160,8 @@ %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro + %else + %define GET_GOT_DEFINED 0 %endif %endif @@ -186,8 +199,16 @@ %ifdef PIC default rel %endif + +%ifndef GET_GOT_DEFINED + %define GET_GOT_DEFINED 0 +%endif ; Done with PIC macros +%ifdef __NASM_VER__ + %use smartalign +%endif + ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that @@ -235,6 +256,7 @@ %define r%1w %2w %define r%1b %2b %define r%1h %2h + %define %2q %2 %if %0 == 2 %define r%1m %2d %define r%1mp %2 @@ -259,9 +281,9 @@ %define e%1h %3 %define r%1b %2 %define e%1b %2 -%if ARCH_X86_64 == 0 - %define r%1 e%1 -%endif + %if ARCH_X86_64 == 0 + %define r%1 e%1 + %endif %endmacro DECLARE_REG_SIZE ax, al, ah @@ -371,7 +393,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %macro ASSERT 1 %if (%1) == 0 - %error assert failed + %error assertion ``%1'' failed %endif %endmacro @@ -462,8 +484,10 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT %if %1 > 0 %assign regs_used (regs_used + 1) - %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2 - %warning "Stack pointer will overwrite register argument" + %endif + %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 + ; Ensure that we don't clobber any registers containing arguments + %assign regs_used 5 + UNIX64 * 3 %endif %endif %endif @@ -577,9 +601,9 @@ DECLARE_REG 14, R15, 120 %macro RET 0 WIN64_RESTORE_XMM_INTERNAL rsp POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 -%if mmsize == 32 - vzeroupper -%endif + %if mmsize == 32 + vzeroupper + %endif AUTO_REP_RET %endmacro @@ -616,17 +640,17 @@ DECLARE_REG 14, R15, 72 %define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0 %macro RET 0 -%if stack_size_padded > 0 -%if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm -%else - add rsp, stack_size_padded -%endif -%endif + %if stack_size_padded > 0 + %if required_stack_alignment > STACK_ALIGNMENT + mov rsp, rstkm + %else + add rsp, stack_size_padded + %endif + %endif POP_IF_USED 14, 13, 12, 11, 10, 9 -%if mmsize == 32 - vzeroupper -%endif + %if mmsize == 32 + vzeroupper + %endif AUTO_REP_RET %endmacro @@ -672,29 +696,29 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0 %macro RET 0 -%if stack_size_padded > 0 -%if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm -%else - add rsp, stack_size_padded -%endif -%endif + %if stack_size_padded > 0 + %if required_stack_alignment > STACK_ALIGNMENT + mov rsp, rstkm + %else + add rsp, stack_size_padded + %endif + %endif POP_IF_USED 6, 5, 4, 3 -%if mmsize == 32 - vzeroupper -%endif + %if mmsize == 32 + vzeroupper + %endif AUTO_REP_RET %endmacro %endif ;====================================================================== %if WIN64 == 0 -%macro WIN64_SPILL_XMM 1 -%endmacro -%macro WIN64_RESTORE_XMM 1 -%endmacro -%macro WIN64_PUSH_XMM 0 -%endmacro + %macro WIN64_SPILL_XMM 1 + %endmacro + %macro WIN64_RESTORE_XMM 1 + %endmacro + %macro WIN64_PUSH_XMM 0 + %endmacro %endif ; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either @@ -707,24 +731,26 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %else rep ret %endif + annotate_function_size %endmacro %define last_branch_adr $$ %macro AUTO_REP_RET 0 - %ifndef cpuflags - times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ != last_branch_adr. - %elif notcpuflag(ssse3) - times ((last_branch_adr-$)>>31)+1 rep + %if notcpuflag(ssse3) + times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. %endif ret + annotate_function_size %endmacro %macro BRANCH_INSTR 0-* %rep %0 %macro %1 1-2 %1 %2 %1 - %%branch_instr: - %xdefine last_branch_adr %%branch_instr + %if notcpuflag(ssse3) + %%branch_instr equ $ + %xdefine last_branch_adr %%branch_instr + %endif %endmacro %rotate 1 %endrep @@ -739,6 +765,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %elif %2 jmp %1 %endif + annotate_function_size %endmacro ;============================================================================= @@ -760,6 +787,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, cglobal_internal 0, %1 %+ SUFFIX, %2 %endmacro %macro cglobal_internal 2-3+ + annotate_function_size %if %1 %xdefine %%FUNCTION_PREFIX private_prefix ; libvpx explicitly sets visibility in shared object builds. Avoid @@ -780,17 +808,10 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 - %ifidn __OUTPUT_FORMAT__,elf32 - global %2:function %%VISIBILITY - %elifidn __OUTPUT_FORMAT__,elf64 + %xdefine current_function_section __SECT__ + %if FORMAT_ELF global %2:function %%VISIBILITY - %elifidn __OUTPUT_FORMAT__,macho32 - %ifdef __NASM_VER__ - global %2 - %else - global %2:private_extern - %endif - %elifidn __OUTPUT_FORMAT__,macho64 + %elif FORMAT_MACHO %ifdef __NASM_VER__ global %2 %else @@ -820,16 +841,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, ; like cextern, but without the prefix %macro cextern_naked 1 - %xdefine %1 mangle(%1) + %ifdef PREFIX + %xdefine %1 mangle(%1) + %endif CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro %macro const 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %ifidn __OUTPUT_FORMAT__,elf32 - global %1:data hidden - %elifidn __OUTPUT_FORMAT__,elf64 + %if FORMAT_ELF global %1:data hidden %else global %1 @@ -837,14 +858,29 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %1: %2 %endmacro -; This is needed for ELF, otherwise the GNU linker assumes the stack is -; executable by default. -%ifidn __OUTPUT_FORMAT__,elf32 -SECTION .note.GNU-stack noalloc noexec nowrite progbits -%elifidn __OUTPUT_FORMAT__,elf64 -SECTION .note.GNU-stack noalloc noexec nowrite progbits +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. +%if FORMAT_ELF + [SECTION .note.GNU-stack noalloc noexec nowrite progbits] %endif +; Tell debuggers how large the function was. +; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. +; This is invoked by RET and similar macros, and also cglobal does it for the previous function, +; but if the last function in a source file doesn't use any of the standard macros for its epilogue, +; then its size might be unspecified. +%macro annotate_function_size 0 + %ifdef __YASM_VER__ + %ifdef current_function + %if FORMAT_ELF + current_function_section + %%ecf equ $ + size current_function %%ecf - current_function + __SECT__ + %endif + %endif + %endif +%endmacro + ; cpuflags %assign cpuflags_mmx (1<<0) @@ -873,12 +909,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt %assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 -%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x)) -%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x)) - -%ifdef __NASM_VER__ - %use smartalign -%endif +; Returns a boolean value expressing whether or not the specified cpuflag is enabled. +%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) +%define notcpuflag(x) (cpuflag(x) ^ 1) ; Takes an arbitrary number of cpuflags from the above list. ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. @@ -915,12 +948,18 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %endif %endif - %ifdef __NASM_VER__ - ALIGNMODE k7 - %elif ARCH_X86_64 || cpuflag(sse2) - CPU amdnop + %if ARCH_X86_64 || cpuflag(sse2) + %ifdef __NASM_VER__ + ALIGNMODE k8 + %else + CPU amdnop + %endif %else - CPU basicnop + %ifdef __NASM_VER__ + ALIGNMODE nop + %else + CPU basicnop + %endif %endif %endmacro @@ -949,14 +988,14 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define movnta movntq %assign %%i 0 %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nnmm, %%i, %%i - %assign %%i %%i+1 + CAT_XDEFINE m, %%i, mm %+ %%i + CAT_XDEFINE nnmm, %%i, %%i + %assign %%i %%i+1 %endrep %rep 8 - CAT_UNDEF m, %%i - CAT_UNDEF nnmm, %%i - %assign %%i %%i+1 + CAT_UNDEF m, %%i + CAT_UNDEF nnmm, %%i + %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro @@ -967,7 +1006,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define mmsize 16 %define num_mmregs 8 %if ARCH_X86_64 - %define num_mmregs 16 + %define num_mmregs 16 %endif %define mova movdqa %define movu movdqu @@ -975,9 +1014,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define movnta movntdq %assign %%i 0 %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nnxmm, %%i, %%i - %assign %%i %%i+1 + CAT_XDEFINE m, %%i, xmm %+ %%i + CAT_XDEFINE nnxmm, %%i, %%i + %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro @@ -988,7 +1027,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define mmsize 32 %define num_mmregs 8 %if ARCH_X86_64 - %define num_mmregs 16 + %define num_mmregs 16 %endif %define mova movdqa %define movu movdqu @@ -996,9 +1035,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define movnta movntdq %assign %%i 0 %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nnymm, %%i, %%i - %assign %%i %%i+1 + CAT_XDEFINE m, %%i, ymm %+ %%i + CAT_XDEFINE nnymm, %%i, %%i + %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro @@ -1022,7 +1061,7 @@ INIT_XMM %assign i 0 %rep 16 DECLARE_MMCAST i -%assign i i+1 + %assign i i+1 %endrep ; I often want to use macros that permute their arguments. e.g. there's no @@ -1040,23 +1079,23 @@ INIT_XMM ; doesn't cost any cycles. %macro PERMUTE 2-* ; takes a list of pairs to swap -%rep %0/2 - %xdefine %%tmp%2 m%2 - %rotate 2 -%endrep -%rep %0/2 - %xdefine m%1 %%tmp%2 - CAT_XDEFINE nn, m%1, %1 - %rotate 2 -%endrep + %rep %0/2 + %xdefine %%tmp%2 m%2 + %rotate 2 + %endrep + %rep %0/2 + %xdefine m%1 %%tmp%2 + CAT_XDEFINE nn, m%1, %1 + %rotate 2 + %endrep %endmacro %macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) -%ifnum %1 ; SWAP 0, 1, ... - SWAP_INTERNAL_NUM %1, %2 -%else ; SWAP m0, m1, ... - SWAP_INTERNAL_NAME %1, %2 -%endif + %ifnum %1 ; SWAP 0, 1, ... + SWAP_INTERNAL_NUM %1, %2 + %else ; SWAP m0, m1, ... + SWAP_INTERNAL_NAME %1, %2 + %endif %endmacro %macro SWAP_INTERNAL_NUM 2-* @@ -1066,7 +1105,7 @@ INIT_XMM %xdefine m%2 %%tmp CAT_XDEFINE nn, m%1, %1 CAT_XDEFINE nn, m%2, %2 - %rotate 1 + %rotate 1 %endrep %endmacro @@ -1074,7 +1113,7 @@ INIT_XMM %xdefine %%args nn %+ %1 %rep %0-1 %xdefine %%args %%args, nn %+ %2 - %rotate 1 + %rotate 1 %endrep SWAP_INTERNAL_NUM %%args %endmacro @@ -1091,7 +1130,7 @@ INIT_XMM %assign %%i 0 %rep num_mmregs CAT_XDEFINE %%f, %%i, m %+ %%i - %assign %%i %%i+1 + %assign %%i %%i+1 %endrep %endmacro @@ -1101,20 +1140,20 @@ INIT_XMM %rep num_mmregs CAT_XDEFINE m, %%i, %1_m %+ %%i CAT_XDEFINE nn, m %+ %%i, %%i - %assign %%i %%i+1 + %assign %%i %%i+1 %endrep %endif %endmacro ; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't %macro call 1 - call_internal %1, %1 %+ SUFFIX + call_internal %1 %+ SUFFIX, %1 %endmacro %macro call_internal 2 - %xdefine %%i %1 - %ifndef cglobaled_%1 - %ifdef cglobaled_%2 - %xdefine %%i %2 + %xdefine %%i %2 + %ifndef cglobaled_%2 + %ifdef cglobaled_%1 + %xdefine %%i %1 %endif %endif call %%i @@ -1157,7 +1196,7 @@ INIT_XMM %endif CAT_XDEFINE sizeofxmm, i, 16 CAT_XDEFINE sizeofymm, i, 32 -%assign i i+1 + %assign i i+1 %endrep %undef i @@ -1534,7 +1573,7 @@ AVX_INSTR pfmul, 3dnow, 1, 0, 1 %else CAT_XDEFINE q, j, i %endif -%assign i i+1 + %assign i i+1 %endrep %undef i %undef j @@ -1557,55 +1596,54 @@ FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation FMA_INSTR pmadcswd, pmaddwd, paddd -; convert FMA4 to FMA3 if possible -%macro FMA4_INSTR 4 - %macro %1 4-8 %1, %2, %3, %4 - %if cpuflag(fma4) - v%5 %1, %2, %3, %4 - %elifidn %1, %2 - v%6 %1, %4, %3 ; %1 = %1 * %3 + %4 - %elifidn %1, %3 - v%7 %1, %2, %4 ; %1 = %2 * %1 + %4 - %elifidn %1, %4 - v%8 %1, %2, %3 ; %1 = %2 * %3 + %1 - %else - %error fma3 emulation of ``%5 %1, %2, %3, %4'' is not supported - %endif - %endmacro -%endmacro - -FMA4_INSTR fmaddpd, fmadd132pd, fmadd213pd, fmadd231pd -FMA4_INSTR fmaddps, fmadd132ps, fmadd213ps, fmadd231ps -FMA4_INSTR fmaddsd, fmadd132sd, fmadd213sd, fmadd231sd -FMA4_INSTR fmaddss, fmadd132ss, fmadd213ss, fmadd231ss - -FMA4_INSTR fmaddsubpd, fmaddsub132pd, fmaddsub213pd, fmaddsub231pd -FMA4_INSTR fmaddsubps, fmaddsub132ps, fmaddsub213ps, fmaddsub231ps -FMA4_INSTR fmsubaddpd, fmsubadd132pd, fmsubadd213pd, fmsubadd231pd -FMA4_INSTR fmsubaddps, fmsubadd132ps, fmsubadd213ps, fmsubadd231ps - -FMA4_INSTR fmsubpd, fmsub132pd, fmsub213pd, fmsub231pd -FMA4_INSTR fmsubps, fmsub132ps, fmsub213ps, fmsub231ps -FMA4_INSTR fmsubsd, fmsub132sd, fmsub213sd, fmsub231sd -FMA4_INSTR fmsubss, fmsub132ss, fmsub213ss, fmsub231ss - -FMA4_INSTR fnmaddpd, fnmadd132pd, fnmadd213pd, fnmadd231pd -FMA4_INSTR fnmaddps, fnmadd132ps, fnmadd213ps, fnmadd231ps -FMA4_INSTR fnmaddsd, fnmadd132sd, fnmadd213sd, fnmadd231sd -FMA4_INSTR fnmaddss, fnmadd132ss, fnmadd213ss, fnmadd231ss - -FMA4_INSTR fnmsubpd, fnmsub132pd, fnmsub213pd, fnmsub231pd -FMA4_INSTR fnmsubps, fnmsub132ps, fnmsub213ps, fnmsub231ps -FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd -FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss - -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug -%if ARCH_X86_64 == 0 -%macro vpbroadcastq 2 -%if sizeof%1 == 16 - movddup %1, %2 -%else - vbroadcastsd %1, %2 -%endif -%endmacro +; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. +; FMA3 is only possible if dst is the same as one of the src registers. +; Either src2 or src3 can be a memory operand. +%macro FMA4_INSTR 2-* + %push fma4_instr + %xdefine %$prefix %1 + %rep %0 - 1 + %macro %$prefix%2 4-6 %$prefix, %2 + %if notcpuflag(fma3) && notcpuflag(fma4) + %error use of ``%5%6'' fma instruction in cpuname function: current_function + %elif cpuflag(fma4) + v%5%6 %1, %2, %3, %4 + %elifidn %1, %2 + ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. + %ifid %3 + v%{5}213%6 %2, %3, %4 + %else + v%{5}132%6 %2, %4, %3 + %endif + %elifidn %1, %3 + v%{5}213%6 %3, %2, %4 + %elifidn %1, %4 + v%{5}231%6 %4, %2, %3 + %else + %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported + %endif + %endmacro + %rotate 1 + %endrep + %pop +%endmacro + +FMA4_INSTR fmadd, pd, ps, sd, ss +FMA4_INSTR fmaddsub, pd, ps +FMA4_INSTR fmsub, pd, ps, sd, ss +FMA4_INSTR fmsubadd, pd, ps +FMA4_INSTR fnmadd, pd, ps, sd, ss +FMA4_INSTR fnmsub, pd, ps, sd, ss + +; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) +%ifdef __YASM_VER__ + %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %macro vpbroadcastq 2 + %if sizeof%1 == 16 + movddup %1, %2 + %else + vbroadcastsd %1, %2 + %endif + %endmacro + %endif %endif diff --git a/libvpx/tools/gen_authors.sh b/libvpx/tools/gen_authors.sh index e1246f08a..4cfd81ec3 100755 --- a/libvpx/tools/gen_authors.sh +++ b/libvpx/tools/gen_authors.sh @@ -6,7 +6,7 @@ cat <<EOF # This file is automatically generated from the git commit history # by tools/gen_authors.sh. -$(git log --pretty=format:"%aN <%aE>" | sort | uniq) +$(git log --pretty=format:"%aN <%aE>" | sort | uniq | grep -v corp.google) Google Inc. The Mozilla Foundation The Xiph.Org Foundation diff --git a/libvpx/tools_common.c b/libvpx/tools_common.c index 20b259ca9..17c0d44f5 100644 --- a/libvpx/tools_common.c +++ b/libvpx/tools_common.c @@ -16,11 +16,11 @@ #include "./tools_common.h" -#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif -#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER +#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif @@ -133,10 +133,6 @@ int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) { #if CONFIG_ENCODERS static const VpxInterface vpx_encoders[] = { -#if CONFIG_VP10_ENCODER - {"vp10", VP10_FOURCC, &vpx_codec_vp10_cx}, -#endif - #if CONFIG_VP8_ENCODER {"vp8", VP8_FOURCC, &vpx_codec_vp8_cx}, #endif @@ -178,10 +174,6 @@ static const VpxInterface vpx_decoders[] = { #if CONFIG_VP9_DECODER {"vp9", VP9_FOURCC, &vpx_codec_vp9_dx}, #endif - -#if CONFIG_VP10_DECODER - {"vp10", VP10_FOURCC, &vpx_codec_vp10_dx}, -#endif }; int get_vpx_decoder_count(void) { diff --git a/libvpx/tools_common.h b/libvpx/tools_common.h index 98347b6f2..310b5695f 100644 --- a/libvpx/tools_common.h +++ b/libvpx/tools_common.h @@ -62,7 +62,6 @@ #define VP8_FOURCC 0x30385056 #define VP9_FOURCC 0x30395056 -#define VP10_FOURCC 0x303a5056 enum VideoFileType { FILE_TYPE_RAW, diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c index 9824a3193..bb6ea76ba 100644 --- a/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c +++ b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c @@ -21,114 +21,6 @@ static const uint8_t bifilter4_coeff[8][2] = { { 16, 112} }; -void vp8_bilinear_predict4x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8; - uint16x8_t q1u16, q2u16; - uint16x8_t q7u16, q8u16, q9u16; - uint64x2_t q4u64, q5u64; - uint64x1_t d12u64; - uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2; - - if (xoffset == 0) { // skip_1stpass_filter - uint32x2_t d28u32 = vdup_n_u32(0); - uint32x2_t d29u32 = vdup_n_u32(0); - uint32x2_t d30u32 = vdup_n_u32(0); - - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0); - src_ptr += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1); - src_ptr += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0); - d28u8 = vreinterpret_u8_u32(d28u32); - d29u8 = vreinterpret_u8_u32(d29u32); - d30u8 = vreinterpret_u8_u32(d30u32); - } else { - d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d6u8 = vld1_u8(src_ptr); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8); - q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8); - d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)), - vreinterpret_u32_u8(vget_high_u8(q1u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)), - vreinterpret_u32_u8(vget_high_u8(q2u8))); - d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)), - vreinterpret_u32_u64(vget_high_u64(q4u64))); - d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), - vreinterpret_u32_u64(vget_high_u64(q5u64))); - - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - q9u16 = vmull_u8(d6u8, d0u8); - - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8); - q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8); - - d28u8 = vqrshrn_n_u16(q7u16, 7); - d29u8 = vqrshrn_n_u16(q8u16, 7); - d30u8 = vqrshrn_n_u16(q9u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d28u8, d0u8); - q2u16 = vmull_u8(d29u8, d0u8); - - d26u8 = vext_u8(d28u8, d29u8, 4); - d27u8 = vext_u8(d29u8, d30u8, 4); - - q1u16 = vmlal_u8(q1u16, d26u8, d1u8); - q2u16 = vmlal_u8(q2u16, d27u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - } - return; -} - void vp8_bilinear_predict8x4_neon( unsigned char *src_ptr, int src_pixels_per_line, diff --git a/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/libvpx/vp8/common/arm/neon/sixtappredict_neon.c index 4c2efc92b..49d8d221f 100644 --- a/libvpx/vp8/common/arm/neon/sixtappredict_neon.c +++ b/libvpx/vp8/common/arm/neon/sixtappredict_neon.c @@ -22,383 +22,6 @@ static const int8_t vp8_sub_pel_filters[8][8] = { {0, -1, 12, 123, -6, 0, 0, 0}, }; -void vp8_sixtap_predict4x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8; - uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8; - uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64; - uint32x2x2_t d0u32x2, d1u32x2; - - if (xoffset == 0) { // secondpass_filter4x4_only - uint32x2_t d27u32 = vdup_n_u32(0); - uint32x2_t d28u32 = vdup_n_u32(0); - uint32x2_t d29u32 = vdup_n_u32(0); - uint32x2_t d30u32 = vdup_n_u32(0); - uint32x2_t d31u32 = vdup_n_u32(0); - - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0); - src += src_pixels_per_line; - d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1); - src += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0); - src += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1); - src += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0); - src += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1); - src += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0); - src += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1); - src += src_pixels_per_line; - d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0); - - d27u8 = vreinterpret_u8_u32(d27u32); - d28u8 = vreinterpret_u8_u32(d28u32); - d29u8 = vreinterpret_u8_u32(d29u32); - d30u8 = vreinterpret_u8_u32(d30u32); - d31u8 = vreinterpret_u8_u32(d31u32); - - d23u8 = vext_u8(d27u8, d28u8, 4); - d24u8 = vext_u8(d28u8, d29u8, 4); - d25u8 = vext_u8(d29u8, d30u8, 4); - d26u8 = vext_u8(d30u8, d31u8, 4); - - q3u16 = vmull_u8(d27u8, d0u8); - q4u16 = vmull_u8(d28u8, d0u8); - q5u16 = vmull_u8(d25u8, d5u8); - q6u16 = vmull_u8(d26u8, d5u8); - - q3u16 = vmlsl_u8(q3u16, d29u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d30u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d1u8); - - q3u16 = vmlal_u8(q3u16, d28u8, d2u8); - q4u16 = vmlal_u8(q4u16, d29u8, d2u8); - q5u16 = vmlal_u8(q5u16, d24u8, d3u8); - q6u16 = vmlal_u8(q6u16, d25u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - - q5s16 = vqaddq_s16(q5s16, q3s16); - q6s16 = vqaddq_s16(q6s16, q4s16); - - d3u8 = vqrshrun_n_s16(q5s16, 7); - d4u8 = vqrshrun_n_s16(q6s16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1); - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - - d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - // vswp here - q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8)); - q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8)); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19 - vreinterpret_u32_u8(d19u8)); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21 - vreinterpret_u32_u8(d21u8)); - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8); - - // keep original src data in q4 q6 - q4u64 = vreinterpretq_u64_u8(q3u8); - q6u64 = vreinterpretq_u64_u8(q5u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7 - vreinterpret_u32_u8(vget_high_u8(q3u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11 - vreinterpret_u32_u8(vget_high_u8(q5u8))); - q9u64 = vshrq_n_u64(q4u64, 8); - q10u64 = vshrq_n_u64(q6u64, 8); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 32); - q5u64 = vshrq_n_u64(q6u64, 32); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u64 = vshrq_n_u64(q4u64, 16); - q10u64 = vshrq_n_u64(q6u64, 16); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 24); - q5u64 = vshrq_n_u64(q6u64, 24); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8); - q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8); - - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q7s16 = vqaddq_s16(q7s16, q9s16); - q8s16 = vqaddq_s16(q8s16, q10s16); - - d27u8 = vqrshrun_n_s16(q7s16, 7); - d28u8 = vqrshrun_n_s16(q8s16, 7); - - if (yoffset == 0) { // firstpass_filter4x4_only - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1); - return; - } - - // First Pass on rest 5-line data - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q11u8 = vld1q_u8(src); - - d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - // vswp here - q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8)); - q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8)); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19 - vreinterpret_u32_u8(d19u8)); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21 - vreinterpret_u32_u8(d21u8)); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5); - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8); - q12u16 = vmull_u8(d31u8, d5u8); - - q4u64 = vreinterpretq_u64_u8(q3u8); - q6u64 = vreinterpretq_u64_u8(q5u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7 - vreinterpret_u32_u8(vget_high_u8(q3u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11 - vreinterpret_u32_u8(vget_high_u8(q5u8))); - q9u64 = vshrq_n_u64(q4u64, 8); - q10u64 = vshrq_n_u64(q6u64, 8); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 32); - q5u64 = vshrq_n_u64(q6u64, 32); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - q9u64 = vshrq_n_u64(q4u64, 16); - q10u64 = vshrq_n_u64(q6u64, 16); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4); - q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8); - q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19 - vreinterpret_u32_u64(vget_high_u64(q9u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211 - vreinterpret_u32_u64(vget_high_u64(q10u64))); - q3u64 = vshrq_n_u64(q4u64, 24); - q5u64 = vshrq_n_u64(q6u64, 24); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2); - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7 - vreinterpret_u32_u64(vget_high_u64(q3u64))); - d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11 - vreinterpret_u32_u64(vget_high_u64(q5u64))); - d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3); - q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8); - q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8); - q11u16 = vmull_u8(d31u8, d3u8); - - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q7s16 = vqaddq_s16(q7s16, q9s16); - q8s16 = vqaddq_s16(q8s16, q10s16); - q12s16 = vqaddq_s16(q12s16, q11s16); - - d29u8 = vqrshrun_n_s16(q7s16, 7); - d30u8 = vqrshrun_n_s16(q8s16, 7); - d31u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 4x4 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - d23u8 = vext_u8(d27u8, d28u8, 4); - d24u8 = vext_u8(d28u8, d29u8, 4); - d25u8 = vext_u8(d29u8, d30u8, 4); - d26u8 = vext_u8(d30u8, d31u8, 4); - - q3u16 = vmull_u8(d27u8, d0u8); - q4u16 = vmull_u8(d28u8, d0u8); - q5u16 = vmull_u8(d25u8, d5u8); - q6u16 = vmull_u8(d26u8, d5u8); - - q3u16 = vmlsl_u8(q3u16, d29u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d30u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d1u8); - - q3u16 = vmlal_u8(q3u16, d28u8, d2u8); - q4u16 = vmlal_u8(q4u16, d29u8, d2u8); - q5u16 = vmlal_u8(q5u16, d24u8, d3u8); - q6u16 = vmlal_u8(q6u16, d25u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - - q5s16 = vqaddq_s16(q5s16, q3s16); - q6s16 = vqaddq_s16(q6s16, q4s16); - - d3u8 = vqrshrun_n_s16(q5s16, 7); - d4u8 = vqrshrun_n_s16(q6s16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1); - return; -} - void vp8_sixtap_predict8x4_neon( unsigned char *src_ptr, int src_pixels_per_line, diff --git a/libvpx/vp8/common/findnearmv.h b/libvpx/vp8/common/findnearmv.h index 155847ca2..472a7b5d8 100644 --- a/libvpx/vp8/common/findnearmv.h +++ b/libvpx/vp8/common/findnearmv.h @@ -104,7 +104,7 @@ vp8_prob *vp8_mv_ref_probs( extern const unsigned char vp8_mbsplit_offset[4][16]; -static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b) +static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { @@ -119,7 +119,8 @@ static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b) return (cur_mb->bmi + b - 1)->mv.as_int; } -static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) +static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b, + int mi_stride) { if (!(b >> 2)) { diff --git a/libvpx/vp8/common/generic/systemdependent.c b/libvpx/vp8/common/generic/systemdependent.c index 28dc262ae..6d5f302d7 100644 --- a/libvpx/vp8/common/generic/systemdependent.c +++ b/libvpx/vp8/common/generic/systemdependent.c @@ -94,6 +94,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_MULTITHREAD ctx->processor_core_count = get_cpu_count(); +#else + (void)ctx; #endif /* CONFIG_MULTITHREAD */ #if ARCH_ARM diff --git a/libvpx/vp8/common/mips/msa/postproc_msa.c b/libvpx/vp8/common/mips/msa/postproc_msa.c index c88f30238..23dcde2eb 100644 --- a/libvpx/vp8/common/mips/msa/postproc_msa.c +++ b/libvpx/vp8/common/mips/msa/postproc_msa.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" static const int16_t vp8_rv_msa[] = @@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows, } } } - -void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise, - char blackclamp[16], char whiteclamp[16], - char bothclamp[16], - uint32_t width, uint32_t height, - int32_t pitch) -{ - uint32_t i, j; - - for (i = 0; i < height / 2; ++i) - { - uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch; - int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff)); - uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch; - int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff)); - for (j = width / 16; j--;) - { - v16i8 temp00_s, temp01_s; - v16u8 temp00, temp01, black_clamp, white_clamp; - v16u8 pos0, ref0, pos1, ref1; - v16i8 const127 = __msa_ldi_b(127); - - pos0 = LD_UB(pos0_ptr); - ref0 = LD_UB(ref0_ptr); - pos1 = LD_UB(pos1_ptr); - ref1 = LD_UB(ref1_ptr); - black_clamp = (v16u8)__msa_fill_b(blackclamp[0]); - white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]); - temp00 = (pos0 < black_clamp); - pos0 = __msa_bmnz_v(pos0, black_clamp, temp00); - temp01 = (pos1 < black_clamp); - pos1 = __msa_bmnz_v(pos1, black_clamp, temp01); - XORI_B2_128_UB(pos0, pos1); - temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127); - temp00 = (v16u8)(temp00_s < pos0); - pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00); - temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127); - temp01 = (temp01_s < pos1); - pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01); - XORI_B2_128_UB(pos0, pos1); - pos0 += ref0; - ST_UB(pos0, pos0_ptr); - pos1 += ref1; - ST_UB(pos1, pos1_ptr); - pos0_ptr += 16; - pos1_ptr += 16; - ref0_ptr += 16; - ref1_ptr += 16; - } - } -} diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c index 322b61383..6baf00f1e 100644 --- a/libvpx/vp8/common/postproc.c +++ b/libvpx/vp8/common/postproc.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "vpx_dsp_rtcd.h" #include "vp8_rtcd.h" #include "vpx_scale_rtcd.h" #include "vpx_scale/yv12config.h" @@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a) state->last_noise = a; } -/**************************************************************************** - * - * ROUTINE : plane_add_noise_c - * - * INPUTS : unsigned char *Start starting address of buffer to add gaussian - * noise to - * unsigned int Width width of plane - * unsigned int Height height of plane - * int Pitch distance between subsequent lines of frame - * int q quantizer used to determine amount of noise - * to add - * - * OUTPUTS : None. - * - * RETURNS : void. - * - * FUNCTION : adds gaussian noise to a plane of pixels - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void vp8_plane_add_noise_c(unsigned char *Start, char *noise, - char blackclamp[16], - char whiteclamp[16], - char bothclamp[16], - unsigned int Width, unsigned int Height, int Pitch) -{ - unsigned int i, j; - (void)bothclamp; - - for (i = 0; i < Height; i++) - { - unsigned char *Pos = Start + i * Pitch; - char *Ref = (char *)(noise + (rand() & 0xff)); - - for (j = 0; j < Width; j++) - { - if (Pos[j] < blackclamp[0]) - Pos[j] = blackclamp[0]; - - if (Pos[j] > 255 + whiteclamp[0]) - Pos[j] = 255 + whiteclamp[0]; - - Pos[j] += Ref[j]; - } - } -} - /* Blend the macro block with a solid colored square. Leave the * edges unblended to give distinction to macro blocks in areas * filled with the same color block. @@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t fillrd(&oci->postproc_state, 63 - q, noise_level); } - vp8_plane_add_noise + vpx_plane_add_noise (oci->post_proc_buffer.y_buffer, oci->postproc_state.noise, oci->postproc_state.blackclamp, diff --git a/libvpx/vp8/common/reconintra4x4.h b/libvpx/vp8/common/reconintra4x4.h index 869841ee3..5dc5d13a5 100644 --- a/libvpx/vp8/common/reconintra4x4.h +++ b/libvpx/vp8/common/reconintra4x4.h @@ -17,8 +17,8 @@ extern "C" { #endif -static void intra_prediction_down_copy(MACROBLOCKD *xd, - unsigned char *above_right_src) +static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, + unsigned char *above_right_src) { int dst_stride = xd->dst.y_stride; unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; diff --git a/libvpx/vp8/common/rtcd_defs.pl b/libvpx/vp8/common/rtcd_defs.pl index 6799c2787..856ede189 100644 --- a/libvpx/vp8/common/rtcd_defs.pl +++ b/libvpx/vp8/common/rtcd_defs.pl @@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") { add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"; specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/; - add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"; - specialize qw/vp8_plane_add_noise mmx sse2 msa/; - $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt; - add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; # no asm yet @@ -209,7 +205,6 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6; $vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2; add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; -#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817 specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/; $vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6; $vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2; @@ -227,7 +222,6 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/; $vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6; add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; -#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892 specialize qw/vp8_bilinear_predict4x4 mmx media msa/; $vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6; diff --git a/libvpx/vp8/common/threading.h b/libvpx/vp8/common/threading.h index 01c82dbb8..183b49b8f 100644 --- a/libvpx/vp8/common/threading.h +++ b/libvpx/vp8/common/threading.h @@ -12,6 +12,8 @@ #ifndef VP8_COMMON_THREADING_H_ #define VP8_COMMON_THREADING_H_ +#include "./vpx_config.h" + #ifdef __cplusplus extern "C" { #endif @@ -19,17 +21,15 @@ extern "C" { #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ -#ifdef _WIN32 +#if defined(_WIN32) && !HAVE_PTHREAD_H /* Win32 */ #include <process.h> #include <windows.h> -#define THREAD_FUNCTION DWORD WINAPI +#define THREAD_FUNCTION unsigned int __stdcall #define THREAD_FUNCTION_RETURN DWORD #define THREAD_SPECIFIC_INDEX DWORD #define pthread_t HANDLE #define pthread_attr_t DWORD -#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL) -#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) #define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) #define thread_sleep(nms) Sleep(nms) #define pthread_cancel(thread) terminate_thread(thread,0) @@ -44,14 +44,11 @@ extern "C" { #include <os2.h> #include <stdlib.h> -#define THREAD_FUNCTION void -#define THREAD_FUNCTION_RETURN void +#define THREAD_FUNCTION void * +#define THREAD_FUNCTION_RETURN void * #define THREAD_SPECIFIC_INDEX PULONG #define pthread_t TID #define pthread_attr_t ULONG -#define pthread_create(thhandle,attr,thfunc,tharg) \ - ((int)((*(thhandle)=_beginthread(thfunc,NULL,1024*1024,tharg))==-1)) -#define pthread_join(thread, result) ((int)DosWaitThread(&(thread),0)) #define pthread_detach(thread) 0 #define thread_sleep(nms) DosSleep(nms) #define pthread_cancel(thread) DosKillThread(thread) @@ -81,8 +78,8 @@ extern "C" { #define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); #endif -/* Syncrhronization macros: Win32 and Pthreads */ -#ifdef _WIN32 +/* Synchronization macros: Win32 and Pthreads */ +#if defined(_WIN32) && !HAVE_PTHREAD_H #define sem_t HANDLE #define pause(voidpara) __asm PAUSE #define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) @@ -185,6 +182,47 @@ static inline int sem_destroy(sem_t * sem) #define x86_pause_hint() #endif +#include "vpx_util/vpx_thread.h" + +static INLINE void mutex_lock(pthread_mutex_t *const mutex) { + const int kMaxTryLocks = 4000; + int locked = 0; + int i; + + for (i = 0; i < kMaxTryLocks; ++i) { + if (!pthread_mutex_trylock(mutex)) { + locked = 1; + break; + } + } + + if (!locked) + pthread_mutex_lock(mutex); +} + +static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) { + int ret; + mutex_lock(mutex); + ret = *p; + pthread_mutex_unlock(mutex); + return ret; +} + +static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col, + const int *last_row_current_mb_col, + const int nsync) { + while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) { + x86_pause_hint(); + thread_sleep(0); + } +} + +static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) { + mutex_lock(mutex); + *p = v; + pthread_mutex_unlock(mutex); +} + #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ #ifdef __cplusplus diff --git a/libvpx/vp8/common/vp8_loopfilter.c b/libvpx/vp8/common/vp8_loopfilter.c index 8b55dff92..756ad488f 100644 --- a/libvpx/vp8/common/vp8_loopfilter.c +++ b/libvpx/vp8/common/vp8_loopfilter.c @@ -141,8 +141,8 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, else /* Delta Value */ { lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; - lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; } + lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; } if (!mbd->mode_ref_lf_delta_enabled) diff --git a/libvpx/vp8/common/x86/postproc_mmx.asm b/libvpx/vp8/common/x86/postproc_mmx.asm index a2b16327f..1a89e7ead 100644 --- a/libvpx/vp8/common/x86/postproc_mmx.asm +++ b/libvpx/vp8/common/x86/postproc_mmx.asm @@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx): %undef flimit2 -;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise, -; unsigned char blackclamp[16], -; unsigned char whiteclamp[16], -; unsigned char bothclamp[16], -; unsigned int Width, unsigned int Height, int Pitch) -global sym(vp8_plane_add_noise_mmx) PRIVATE -sym(vp8_plane_add_noise_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -.addnoise_loop: - call sym(LIBVPX_RAND) WRT_PLT - mov rcx, arg(1) ;noise - and rax, 0xff - add rcx, rax - - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - - mov rdi, rcx - movsxd rcx, dword arg(5) ;[Width] - mov rsi, arg(0) ;Pos - xor rax,rax - -.addnoise_nextset: - movq mm1,[rsi+rax] ; get the source - - psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb mm1, [rdx+32] ;bothclamp - psubusb mm1, [rdx+16] ;whiteclamp - - movq mm2,[rdi+rax] ; get the noise for this line - paddb mm1,mm2 ; add it in - movq [rsi+rax],mm1 ; store the result - - add rax,8 ; move to the next line - - cmp rax, rcx - jl .addnoise_nextset - - movsxd rax, dword arg(7) ; Pitch - add arg(0), rax ; Start += Pitch - sub dword arg(6), 1 ; Height -= 1 - jg .addnoise_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA align 16 Blur: diff --git a/libvpx/vp8/common/x86/postproc_sse2.asm b/libvpx/vp8/common/x86/postproc_sse2.asm index fed4ee5cc..de17afa5c 100644 --- a/libvpx/vp8/common/x86/postproc_sse2.asm +++ b/libvpx/vp8/common/x86/postproc_sse2.asm @@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm): %undef flimit4 -;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise, -; unsigned char blackclamp[16], -; unsigned char whiteclamp[16], -; unsigned char bothclamp[16], -; unsigned int Width, unsigned int Height, int Pitch) -global sym(vp8_plane_add_noise_wmt) PRIVATE -sym(vp8_plane_add_noise_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -.addnoise_loop: - call sym(LIBVPX_RAND) WRT_PLT - mov rcx, arg(1) ;noise - and rax, 0xff - add rcx, rax - - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - - mov rdi, rcx - movsxd rcx, dword arg(5) ;[Width] - mov rsi, arg(0) ;Pos - xor rax,rax - -.addnoise_nextset: - movdqu xmm1,[rsi+rax] ; get the source - - psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb xmm1, [rdx+32] ;bothclamp - psubusb xmm1, [rdx+16] ;whiteclamp - - movdqu xmm2,[rdi+rax] ; get the noise for this line - paddb xmm1,xmm2 ; add it in - movdqu [rsi+rax],xmm1 ; store the result - - add rax,16 ; move to the next line - - cmp rax, rcx - jl .addnoise_nextset - - movsxd rax, dword arg(7) ; Pitch - add arg(0), rax ; Start += Pitch - sub dword arg(6), 1 ; Height -= 1 - jg .addnoise_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA align 16 four8s: diff --git a/libvpx/vp8/decoder/dboolhuff.c b/libvpx/vp8/decoder/dboolhuff.c index 8a7e33205..5cdd2a249 100644 --- a/libvpx/vp8/decoder/dboolhuff.c +++ b/libvpx/vp8/decoder/dboolhuff.c @@ -44,7 +44,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br) int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); size_t bytes_left = br->user_buffer_end - bufptr; size_t bits_left = bytes_left * CHAR_BIT; - int x = (int)(shift + CHAR_BIT - bits_left); + int x = shift + CHAR_BIT - (int)bits_left; int loop_end = 0; unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; diff --git a/libvpx/vp8/decoder/dboolhuff.h b/libvpx/vp8/decoder/dboolhuff.h index cc9eaaf43..1b1bbf868 100644 --- a/libvpx/vp8/decoder/dboolhuff.h +++ b/libvpx/vp8/decoder/dboolhuff.h @@ -83,7 +83,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { } { - register unsigned int shift = vp8_norm[range]; + register int shift = vp8_norm[range]; range <<= shift; value <<= shift; count -= shift; diff --git a/libvpx/vp8/decoder/decodeframe.c b/libvpx/vp8/decoder/decodeframe.c index f0d760373..51acdbb9c 100644 --- a/libvpx/vp8/decoder/decodeframe.c +++ b/libvpx/vp8/decoder/decodeframe.c @@ -73,10 +73,9 @@ void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) /* Delta Value */ else - { QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ - } + + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ } else QIndex = pc->base_qindex; @@ -145,8 +144,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, */ pbi->frame_corrupt_residual = 1; memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - vp8_conceal_corrupt_mb(xd); - corruption_detected = 1; @@ -626,8 +623,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) */ vp8_interpolate_motion(xd, mb_row, mb_col, - pc->mb_rows, pc->mb_cols, - pc->mode_info_stride); + pc->mb_rows, pc->mb_cols); } } #endif @@ -987,7 +983,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; const unsigned char *data = pbi->fragments.ptrs[0]; - const unsigned char *data_end = data + pbi->fragments.sizes[0]; + const unsigned int data_sz = pbi->fragments.sizes[0]; + const unsigned char *data_end = data + data_sz; ptrdiff_t first_partition_length_in_bytes; int i, j, k, l; @@ -1023,7 +1020,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) const unsigned char *clear = data; if (pbi->decrypt_cb) { - int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data); + int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); clear = clear_buffer; } diff --git a/libvpx/vp8/decoder/error_concealment.c b/libvpx/vp8/decoder/error_concealment.c index 0b846a08b..a73813fc0 100644 --- a/libvpx/vp8/decoder/error_concealment.c +++ b/libvpx/vp8/decoder/error_concealment.c @@ -194,7 +194,7 @@ void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul, return; } - if (new_row <= (-4 << 3) || new_col <= (-4 << 3)) + if (new_row <= -32 || new_col <= -32) { /* outside the frame */ return; @@ -558,8 +558,7 @@ static void interpolate_mvs(MACROBLOCKD *mb, void vp8_interpolate_motion(MACROBLOCKD *mb, int mb_row, int mb_col, - int mb_rows, int mb_cols, - int mi_stride) + int mb_rows, int mb_cols) { /* Find relevant neighboring blocks */ EC_BLOCK neighbors[NUM_NEIGHBORS]; @@ -585,13 +584,3 @@ void vp8_interpolate_motion(MACROBLOCKD *mb, mb->mode_info_context->mbmi.partitioning = 3; mb->mode_info_context->mbmi.segment_id = 0; } - -void vp8_conceal_corrupt_mb(MACROBLOCKD *xd) -{ - /* This macroblock has corrupt residual, use the motion compensated - image (predictor) for concealment */ - - /* The build predictor functions now output directly into the dst buffer, - * so the copies are no longer necessary */ - -} diff --git a/libvpx/vp8/decoder/error_concealment.h b/libvpx/vp8/decoder/error_concealment.h index 9a1e02486..b6b49725b 100644 --- a/libvpx/vp8/decoder/error_concealment.h +++ b/libvpx/vp8/decoder/error_concealment.h @@ -34,13 +34,7 @@ void vp8_estimate_missing_mvs(VP8D_COMP *pbi); * (mb_row, mb_col). */ void vp8_interpolate_motion(MACROBLOCKD *mb, int mb_row, int mb_col, - int mb_rows, int mb_cols, - int mi_stride); - -/* Conceal a macroblock with corrupt residual. - * Copies the prediction signal to the reconstructed image. - */ -void vp8_conceal_corrupt_mb(MACROBLOCKD *xd); + int mb_rows, int mb_cols); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp8/decoder/onyxd_int.h b/libvpx/vp8/decoder/onyxd_int.h index aa2cc57f7..313fe01c0 100644 --- a/libvpx/vp8/decoder/onyxd_int.h +++ b/libvpx/vp8/decoder/onyxd_int.h @@ -81,7 +81,7 @@ typedef struct VP8D_COMP #if CONFIG_MULTITHREAD /* variable for threading */ - volatile int b_multithreaded_rd; + int b_multithreaded_rd; int max_threads; int current_mb_col_main; unsigned int decoding_thread_count; @@ -90,6 +90,8 @@ typedef struct VP8D_COMP int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; int *mt_current_mb_col; /* Each row remembers its already decoded column. */ + pthread_mutex_t *pmutex; + pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */ unsigned char **mt_yabove_row; /* mb_rows x width */ unsigned char **mt_uabove_row; diff --git a/libvpx/vp8/decoder/threading.c b/libvpx/vp8/decoder/threading.c index 7c7184c78..3c1b8387e 100644 --- a/libvpx/vp8/decoder/threading.c +++ b/libvpx/vp8/decoder/threading.c @@ -52,9 +52,6 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); - mbd->mode_info_stride = pc->mode_info_stride; - mbd->frame_type = pc->frame_type; mbd->pre = xd->pre; mbd->dst = xd->dst; @@ -139,8 +136,6 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, */ pbi->frame_corrupt_residual = 1; memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - vp8_conceal_corrupt_mb(xd); - corruption_detected = 1; @@ -298,8 +293,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) { - volatile const int *last_row_current_mb_col; - volatile int *current_mb_col; + const int *last_row_current_mb_col; + int *current_mb_col; int mb_row; VP8_COMMON *pc = &pbi->common; const int nsync = pbi->sync_range; @@ -337,6 +332,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) xd->up_available = (start_mb_row != 0); + xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; + xd->mode_info_stride = pc->mode_info_stride; + for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { int recon_yoffset, recon_uvoffset; @@ -405,17 +403,15 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) xd->dst.uv_stride); } - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - *current_mb_col = mb_col - 1; + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { + if (((mb_col - 1) % nsync) == 0) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row]; + protected_write(mutex, current_mb_col, mb_col - 1); + } - if ((mb_col & (nsync - 1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync)) - { - x86_pause_hint(); - thread_sleep(0); - } + if (mb_row && !(mb_col & (nsync - 1))) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1]; + sync_read(mutex, mb_col, last_row_current_mb_col, nsync); } /* Distance of MB to the various image edges. @@ -449,8 +445,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) */ vp8_interpolate_motion(xd, mb_row, mb_col, - pc->mb_rows, pc->mb_cols, - pc->mode_info_stride); + pc->mb_rows, pc->mb_cols); } } #endif @@ -604,7 +599,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); /* last MB of row is ready just after extension is done */ - *current_mb_col = mb_col + nsync; + protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; @@ -629,12 +624,12 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) while (1) { - if (pbi->b_multithreaded_rd == 0) + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break; if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { - if (pbi->b_multithreaded_rd == 0) + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break; else { @@ -657,6 +652,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; + pthread_mutex_init(&pbi->mt_mutex, NULL); /* limit decoding threads to the max number of token partitions */ core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; @@ -699,8 +695,17 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { int i; - if (pbi->b_multithreaded_rd) + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) { + /* De-allocate mutex */ + if (pbi->pmutex != NULL) { + for (i = 0; i < mb_rows; i++) { + pthread_mutex_destroy(&pbi->pmutex[i]); + } + vpx_free(pbi->pmutex); + pbi->pmutex = NULL; + } + vpx_free(pbi->mt_current_mb_col); pbi->mt_current_mb_col = NULL ; @@ -781,7 +786,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) int i; int uv_width; - if (pbi->b_multithreaded_rd) + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) { vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); @@ -796,6 +801,15 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) uv_width = width >>1; + /* Allocate mutex */ + CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) * + pc->mb_rows)); + if (pbi->pmutex) { + for (i = 0; i < pc->mb_rows; i++) { + pthread_mutex_init(&pbi->pmutex[i], NULL); + } + } + /* Allocate an int for each mb row. */ CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); @@ -831,11 +845,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) void vp8_decoder_remove_threads(VP8D_COMP *pbi) { /* shutdown MB Decoding thread; */ - if (pbi->b_multithreaded_rd) + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) { int i; - pbi->b_multithreaded_rd = 0; + protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0); /* allow all threads to exit */ for (i = 0; i < pbi->allocated_decoding_thread_count; i++) @@ -863,6 +877,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) vpx_free(pbi->de_thread_data); pbi->de_thread_data = NULL; } + pthread_mutex_destroy(&pbi->mt_mutex); } void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) diff --git a/libvpx/vp8/encoder/bitstream.c b/libvpx/vp8/encoder/bitstream.c index f3d91b552..3196422c2 100644 --- a/libvpx/vp8/encoder/bitstream.c +++ b/libvpx/vp8/encoder/bitstream.c @@ -163,7 +163,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount) { const TOKENEXTRA *stop = p + xcount; unsigned int split; - unsigned int shift; + int shift; int count = w->count; unsigned int range = w->range; unsigned int lowvalue = w->lowvalue; diff --git a/libvpx/vp8/encoder/boolhuff.h b/libvpx/vp8/encoder/boolhuff.h index 7c012a829..e66a2dbd8 100644 --- a/libvpx/vp8/encoder/boolhuff.h +++ b/libvpx/vp8/encoder/boolhuff.h @@ -65,7 +65,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) int count = br->count; unsigned int range = br->range; unsigned int lowvalue = br->lowvalue; - register unsigned int shift; + register int shift; #ifdef VP8_ENTROPY_STATS #if defined(SECTIONBITS_OUTPUT) diff --git a/libvpx/vp8/encoder/denoising.c b/libvpx/vp8/encoder/denoising.c index d197f8f81..26ce120b4 100644 --- a/libvpx/vp8/encoder/denoising.c +++ b/libvpx/vp8/encoder/denoising.c @@ -23,7 +23,7 @@ static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; */ static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; -static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60; +static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 80; /* * The filter function was modified to reduce the computational complexity. @@ -440,6 +440,11 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, denoiser->yv12_last_source.frame_size); denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1); + if (!denoiser->denoise_state) + { + vp8_denoiser_free(denoiser); + return 1; + } memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols)); vp8_denoiser_set_parameters(denoiser, mode); denoiser->nmse_source_diff = 0; @@ -492,7 +497,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, loop_filter_info_n *lfi_n, int mb_row, int mb_col, - int block_index) + int block_index, + int consec_zero_last) { int mv_row; @@ -523,7 +529,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, // Bias on zero motion vector sse. const int zero_bias = denoiser->denoise_pars.denoise_mv_bias; zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100); - sse_diff = zero_mv_sse - best_sse; + sse_diff = (int)zero_mv_sse - (int)best_sse; saved_mbmi = *mbmi; @@ -566,59 +572,69 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, best_sse = zero_mv_sse; } - saved_pre = filter_xd->pre; - saved_dst = filter_xd->dst; - - /* Compensate the running average. */ - filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; - filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; - filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; - /* Write the compensated running average to the destination buffer. */ - filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; - filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; - filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; - - if (!x->skip) - { - vp8_build_inter_predictors_mb(filter_xd); - } - else - { - vp8_build_inter16x16_predictors_mb(filter_xd, - filter_xd->dst.y_buffer, - filter_xd->dst.u_buffer, - filter_xd->dst.v_buffer, - filter_xd->dst.y_stride, - filter_xd->dst.uv_stride); + mv_row = x->best_sse_mv.as_mv.row; + mv_col = x->best_sse_mv.as_mv.col; + motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; + motion_threshold = denoiser->denoise_pars.scale_motion_thresh * + NOISE_MOTION_THRESHOLD; + + if (motion_magnitude2 < + denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD) + x->increase_denoising = 1; + + sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD; + if (x->increase_denoising) + sse_thresh = + denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH; + + if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold) + decision = COPY_BLOCK; + + // If block is considered skin, don't denoise if the block + // (1) is selected as non-zero motion for current frame, or + // (2) has not been selected as ZERO_LAST mode at least x past frames + // in a row. + // TODO(marpan): Parameter "x" should be varied with framerate. + // In particualar, should be reduced for layers (base layer/LAST). + if (x->is_skin && (consec_zero_last < 2 || motion_magnitude2 > 0)) + decision = COPY_BLOCK; + + if (decision == FILTER_BLOCK) { + saved_pre = filter_xd->pre; + saved_dst = filter_xd->dst; + + /* Compensate the running average. */ + filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; + filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; + filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; + /* Write the compensated running average to the destination buffer. */ + filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; + filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; + filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; + + if (!x->skip) + { + vp8_build_inter_predictors_mb(filter_xd); + } + else + { + vp8_build_inter16x16_predictors_mb(filter_xd, + filter_xd->dst.y_buffer, + filter_xd->dst.u_buffer, + filter_xd->dst.v_buffer, + filter_xd->dst.y_stride, + filter_xd->dst.uv_stride); + } + filter_xd->pre = saved_pre; + filter_xd->dst = saved_dst; + *mbmi = saved_mbmi; } - filter_xd->pre = saved_pre; - filter_xd->dst = saved_dst; - *mbmi = saved_mbmi; - - } - - mv_row = x->best_sse_mv.as_mv.row; - mv_col = x->best_sse_mv.as_mv.col; - motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; - motion_threshold = denoiser->denoise_pars.scale_motion_thresh * - NOISE_MOTION_THRESHOLD; - - // If block is considered to be skin area, lower the motion threshold. - // In current version set threshold = 1, so only denoise very low - // (i.e., zero) mv on skin. - if (x->is_skin) - motion_threshold = 1; - - if (motion_magnitude2 < - denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD) - x->increase_denoising = 1; - - sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD; - if (x->increase_denoising) - sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH; - - if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold) + } else { + // zero_frame should always be 1 for real-time mode, as the + // ZEROMV mode is always checked, so we should never go into this branch. + // If case ZEROMV is not checked, then we will force no denoise (COPY). decision = COPY_BLOCK; + } if (decision == FILTER_BLOCK) { diff --git a/libvpx/vp8/encoder/denoising.h b/libvpx/vp8/encoder/denoising.h index 9a379a6a1..8c126c1cb 100644 --- a/libvpx/vp8/encoder/denoising.h +++ b/libvpx/vp8/encoder/denoising.h @@ -18,8 +18,8 @@ extern "C" { #endif -#define SUM_DIFF_THRESHOLD (16 * 16 * 2) -#define SUM_DIFF_THRESHOLD_HIGH (600) // ~(16 * 16 * 1.5) +#define SUM_DIFF_THRESHOLD 512 +#define SUM_DIFF_THRESHOLD_HIGH 600 #define MOTION_MAGNITUDE_THRESHOLD (8*3) #define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5) @@ -108,7 +108,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, loop_filter_info_n *lfi_n, int mb_row, int mb_col, - int block_index); + int block_index, + int consec_zero_last); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp8/encoder/encodeframe.c b/libvpx/vp8/encoder/encodeframe.c index b0aaa2f0b..9b05cd1fc 100644 --- a/libvpx/vp8/encoder/encodeframe.c +++ b/libvpx/vp8/encoder/encodeframe.c @@ -386,8 +386,8 @@ void encode_mb_row(VP8_COMP *cpi, #if CONFIG_MULTITHREAD const int nsync = cpi->mt_sync_range; const int rightmost_col = cm->mb_cols + nsync; - volatile const int *last_row_current_mb_col; - volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; + const int *last_row_current_mb_col; + int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; @@ -461,17 +461,15 @@ void encode_mb_row(VP8_COMP *cpi, vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded != 0) - { - *current_mb_col = mb_col - 1; /* set previous MB done */ + if (cpi->b_multi_threaded != 0) { + if (((mb_col - 1) % nsync) == 0) { + pthread_mutex_t *mutex = &cpi->pmutex[mb_row]; + protected_write(mutex, current_mb_col, mb_col - 1); + } - if ((mb_col & (nsync - 1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync)) - { - x86_pause_hint(); - thread_sleep(0); - } + if (mb_row && !(mb_col & (nsync - 1))) { + pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1]; + sync_read(mutex, mb_col, last_row_current_mb_col, nsync); } } #endif @@ -616,7 +614,7 @@ void encode_mb_row(VP8_COMP *cpi, #if CONFIG_MULTITHREAD if (cpi->b_multi_threaded != 0) - *current_mb_col = rightmost_col; + protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col); #endif /* this is to account for the border */ diff --git a/libvpx/vp8/encoder/ethreading.c b/libvpx/vp8/encoder/ethreading.c index 4e234ccd5..2a0c2987b 100644 --- a/libvpx/vp8/encoder/ethreading.c +++ b/libvpx/vp8/encoder/ethreading.c @@ -26,12 +26,13 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) while (1) { - if (cpi->b_multi_threaded == 0) + if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break; if (sem_wait(&cpi->h_event_start_lpf) == 0) { - if (cpi->b_multi_threaded == 0) /* we're shutting down */ + /* we're shutting down */ + if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break; vp8_loopfilter_frame(cpi, cm); @@ -53,7 +54,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) while (1) { - if (cpi->b_multi_threaded == 0) + if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break; if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) @@ -72,9 +73,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int *segment_counts = mbri->segment_counts; int *totalrate = &mbri->totalrate; - if (cpi->b_multi_threaded == 0) /* we're shutting down */ + /* we're shutting down */ + if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break; + xd->mode_info_context = cm->mi + cm->mode_info_stride * + (ithread + 1); + xd->mode_info_stride = cm->mode_info_stride; + for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { @@ -85,8 +91,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cm->mb_cols); - volatile const int *last_row_current_mb_col; - volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; + const int *last_row_current_mb_col; + int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; @@ -113,15 +119,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - *current_mb_col = mb_col - 1; + if (((mb_col - 1) % nsync) == 0) { + pthread_mutex_t *mutex = &cpi->pmutex[mb_row]; + protected_write(mutex, current_mb_col, mb_col - 1); + } - if ((mb_col & (nsync - 1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync)) - { - x86_pause_hint(); - thread_sleep(0); - } + if (mb_row && !(mb_col & (nsync - 1))) { + pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1]; + sync_read(mutex, mb_col, last_row_current_mb_col, nsync); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING @@ -296,7 +301,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - *current_mb_col = mb_col + nsync; + protected_write(&cpi->pmutex[mb_row], current_mb_col, + mb_col + nsync); /* this is to account for the border */ xd->mode_info_context++; @@ -473,9 +479,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); - mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1); - mbd->mode_info_stride = cm->mode_info_stride; - mbd->frame_type = cm->frame_type; mb->src = * cpi->Source; @@ -515,7 +518,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; - cpi->b_lpf_running = 0; + + pthread_mutex_init(&cpi->mt_mutex, NULL); if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { @@ -580,7 +584,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) if(rc) { /* shutdown other threads */ - cpi->b_multi_threaded = 0; + protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0); for(--ithread; ithread >= 0; ithread--) { pthread_join(cpi->h_encoding_thread[ithread], 0); @@ -594,6 +598,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); + pthread_mutex_destroy(&cpi->mt_mutex); + return -1; } @@ -611,7 +617,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) if(rc) { /* shutdown other threads */ - cpi->b_multi_threaded = 0; + protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0); for(--ithread; ithread >= 0; ithread--) { sem_post(&cpi->h_event_start_encoding[ithread]); @@ -628,6 +634,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); + pthread_mutex_destroy(&cpi->mt_mutex); + return -2; } } @@ -637,10 +645,10 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { - if (cpi->b_multi_threaded) + if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded)) { /* shutdown other threads */ - cpi->b_multi_threaded = 0; + protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0); { int i; @@ -666,5 +674,6 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); } + pthread_mutex_destroy(&cpi->mt_mutex); } #endif diff --git a/libvpx/vp8/encoder/firstpass.c b/libvpx/vp8/encoder/firstpass.c index 4c2acc774..c526a3e89 100644 --- a/libvpx/vp8/encoder/firstpass.c +++ b/libvpx/vp8/encoder/firstpass.c @@ -18,6 +18,7 @@ #include "onyx_int.h" #include "vpx_dsp/variance.h" #include "encodeintra.h" +#include "vp8/common/common.h" #include "vp8/common/setupintrarecon.h" #include "vp8/common/systemdependent.h" #include "mcomp.h" @@ -2417,7 +2418,7 @@ void vp8_second_pass(VP8_COMP *cpi) int tmp_q; int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); - FIRSTPASS_STATS this_frame = {0}; + FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; @@ -2425,6 +2426,8 @@ void vp8_second_pass(VP8_COMP *cpi) int overhead_bits; + vp8_zero(this_frame); + if (!cpi->twopass.stats_in) { return ; @@ -2808,7 +2811,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) * static scene. */ if ( detect_transition_to_still( cpi, i, - (cpi->key_frame_frequency-i), + ((int)(cpi->key_frame_frequency) - + (int)i), loop_decay_rate, decay_accumulator ) ) { diff --git a/libvpx/vp8/encoder/lookahead.c b/libvpx/vp8/encoder/lookahead.c index ce2ce08c1..662338574 100644 --- a/libvpx/vp8/encoder/lookahead.c +++ b/libvpx/vp8/encoder/lookahead.c @@ -181,6 +181,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, { struct lookahead_entry* buf = NULL; + assert(ctx != NULL); if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1)) { buf = pop(ctx, &ctx->read_idx); diff --git a/libvpx/vp8/encoder/mcomp.c b/libvpx/vp8/encoder/mcomp.c index 768c764ce..e20c1ea7b 100644 --- a/libvpx/vp8/encoder/mcomp.c +++ b/libvpx/vp8/encoder/mcomp.c @@ -1591,7 +1591,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int col_min = ref_col - distance; int col_max = ref_col + distance; - // TODO(johannkoenig): check if this alignment is necessary. DECLARE_ALIGNED(16, unsigned int, sad_array8[8]); unsigned int sad_array[3]; diff --git a/libvpx/vp8/encoder/onyx_if.c b/libvpx/vp8/encoder/onyx_if.c index df5bcf688..d5a0fff35 100644 --- a/libvpx/vp8/encoder/onyx_if.c +++ b/libvpx/vp8/encoder/onyx_if.c @@ -477,6 +477,18 @@ static void dealloc_compressor_data(VP8_COMP *cpi) cpi->mb.pip = 0; #if CONFIG_MULTITHREAD + /* De-allocate mutex */ + if (cpi->pmutex != NULL) { + VP8_COMMON *const pc = &cpi->common; + int i; + + for (i = 0; i < pc->mb_rows; i++) { + pthread_mutex_destroy(&cpi->pmutex[i]); + } + vpx_free(cpi->pmutex); + cpi->pmutex = NULL; + } + vpx_free(cpi->mt_current_mb_col); cpi->mt_current_mb_col = NULL; #endif @@ -1180,6 +1192,9 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) int width = cm->Width; int height = cm->Height; +#if CONFIG_MULTITHREAD + int prev_mb_rows = cm->mb_rows; +#endif if (vp8_alloc_frame_buffers(cm, width, height)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, @@ -1271,6 +1286,25 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) if (cpi->oxcf.multi_threaded > 1) { + int i; + + /* De-allocate and re-allocate mutex */ + if (cpi->pmutex != NULL) { + for (i = 0; i < prev_mb_rows; i++) { + pthread_mutex_destroy(&cpi->pmutex[i]); + } + vpx_free(cpi->pmutex); + cpi->pmutex = NULL; + } + + CHECK_MEM_ERROR(cpi->pmutex, vpx_malloc(sizeof(*cpi->pmutex) * + cm->mb_rows)); + if (cpi->pmutex) { + for (i = 0; i < cm->mb_rows; i++) { + pthread_mutex_init(&cpi->pmutex[i], NULL); + } + } + vpx_free(cpi->mt_current_mb_col); CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); @@ -1284,9 +1318,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp8_denoiser_free(&cpi->denoiser); - vp8_denoiser_allocate(&cpi->denoiser, width, height, - cm->mb_rows, cm->mb_cols, - cpi->oxcf.noise_sensitivity); + if (vp8_denoiser_allocate(&cpi->denoiser, width, height, + cm->mb_rows, cm->mb_cols, + cpi->oxcf.noise_sensitivity)) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate denoiser"); } #endif } @@ -1487,7 +1523,8 @@ static void update_layer_contexts (VP8_COMP *cpi) void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { VP8_COMMON *cm = &cpi->common; - int last_w, last_h, prev_number_of_layers; + int last_w, last_h; + unsigned int prev_number_of_layers; if (!cpi) return; @@ -1495,15 +1532,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (!oxcf) return; -#if CONFIG_MULTITHREAD - /* wait for the last picture loopfilter thread done */ - if (cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - if (cm->version != oxcf->Version) { cm->version = oxcf->Version; @@ -1759,10 +1787,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height) cpi->force_next_frame_intra = 1; - if (((cm->Width + 15) & 0xfffffff0) != - cm->yv12_fb[cm->lst_fb_idx].y_width || - ((cm->Height + 15) & 0xfffffff0) != - cm->yv12_fb[cm->lst_fb_idx].y_height || + if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width || + ((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height || cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { dealloc_raw_frame_buffers(cpi); @@ -1798,9 +1824,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { int width = (cpi->oxcf.Width + 15) & ~15; int height = (cpi->oxcf.Height + 15) & ~15; - vp8_denoiser_allocate(&cpi->denoiser, width, height, - cm->mb_rows, cm->mb_cols, - cpi->oxcf.noise_sensitivity); + if (vp8_denoiser_allocate(&cpi->denoiser, width, height, + cm->mb_rows, cm->mb_cols, + cpi->oxcf.noise_sensitivity)) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate denoiser"); } } #endif @@ -2228,6 +2256,8 @@ void vp8_remove_compressor(VP8_COMP **ptr) double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded; + const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000; + const double rate_err = ((100.0 * (dr - target_rate)) / target_rate); if (cpi->b_calculate_psnr) { @@ -2273,12 +2303,14 @@ void vp8_remove_compressor(VP8_COMP **ptr) cpi->summed_weights, 8.0); fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" - "GLPsnrP\tVPXSSIM\t Time(us)\n"); + "GLPsnrP\tVPXSSIM\t Time(us) Rc-Err " + "Abs Err\n"); fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%8.0f\n", + "%7.3f\t%8.0f %7.2f %7.2f\n", dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, - total_ssim, total_encode_time); + total_ssim, total_encode_time, + rate_err, fabs(rate_err)); } } @@ -3600,15 +3632,6 @@ static void encode_frame_to_data_rate /* Clear down mmx registers to allow floating point in what follows */ vp8_clear_system_state(); -#if CONFIG_MULTITHREAD - /* wait for the last picture loopfilter thread done */ - if (cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - if(cpi->force_next_frame_intra) { cm->frame_type = KEY_FRAME; /* delayed intra frame */ @@ -4337,8 +4360,6 @@ static void encode_frame_to_data_rate vp8_setup_key_frame(cpi); } - - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { if(cpi->oxcf.error_resilient_mode) @@ -4804,7 +4825,6 @@ static void encode_frame_to_data_rate { /* start loopfilter in separate thread */ sem_post(&cpi->h_event_start_lpf); - cpi->b_lpf_running = 1; } else #endif @@ -4836,11 +4856,10 @@ static void encode_frame_to_data_rate vp8_pack_bitstream(cpi, dest, dest_end, size); #if CONFIG_MULTITHREAD - /* if PSNR packets are generated we have to wait for the lpf */ - if (cpi->b_lpf_running && cpi->b_calculate_psnr) + /* wait for the lpf thread done */ + if (cpi->b_multi_threaded) { sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; } #endif @@ -5201,7 +5220,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, vp8_second_pass(cpi); encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags); - cpi->twopass.bits_left -= 8 * *size; + cpi->twopass.bits_left -= 8 * (int)(*size); if (!cpi->common.refresh_alt_ref_frame) { @@ -5800,14 +5819,6 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla { int ret; -#if CONFIG_MULTITHREAD - if(cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - #if CONFIG_POSTPROC cpi->common.show_frame_mi = cpi->common.mi; ret = vp8_post_proc_frame(&cpi->common, dest, flags); @@ -5845,7 +5856,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne return -1; // Check number of rows and columns match - if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) + if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols) return -1; // Range check the delta Q values and convert the external Q range values @@ -5901,7 +5912,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols) { - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) + if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols) { if (map) { diff --git a/libvpx/vp8/encoder/onyx_int.h b/libvpx/vp8/encoder/onyx_int.h index 317e4b9e4..44fbbd456 100644 --- a/libvpx/vp8/encoder/onyx_int.h +++ b/libvpx/vp8/encoder/onyx_int.h @@ -371,7 +371,7 @@ typedef struct VP8_COMP double key_frame_rate_correction_factor; double gf_rate_correction_factor; - unsigned int frames_since_golden; + int frames_since_golden; /* Count down till next GF */ int frames_till_gf_update_due; @@ -530,11 +530,12 @@ typedef struct VP8_COMP #if CONFIG_MULTITHREAD /* multithread data */ + pthread_mutex_t *pmutex; + pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */ int * mt_current_mb_col; int mt_sync_range; int b_multi_threaded; int encoding_thread_count; - int b_lpf_running; pthread_t *h_encoding_thread; pthread_t h_filter_thread; diff --git a/libvpx/vp8/encoder/pickinter.c b/libvpx/vp8/encoder/pickinter.c index d0fff3f04..24b332dcd 100644 --- a/libvpx/vp8/encoder/pickinter.c +++ b/libvpx/vp8/encoder/pickinter.c @@ -36,6 +36,8 @@ extern unsigned int cnt_pm; #endif +#define MODEL_MODE 1 + extern const int vp8_ref_frame_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; @@ -45,18 +47,22 @@ extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; // skin color classifier is defined. // Fixed-point skin color model parameters. -static const int skin_mean[2] = {7463, 9614}; // q6 +static const int skin_mean[5][2] = + {{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}}; static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 -static const int skin_threshold = 1570636; // q18 +static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000, + 800000}; // q18 // Evaluates the Mahalanobis distance measure for the input CbCr values. -static int evaluate_skin_color_difference(int cb, int cr) -{ +static int evaluate_skin_color_difference(int cb, int cr, int idx) { const int cb_q6 = cb << 6; const int cr_q6 = cr << 6; - const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]); - const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]); - const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]); + const int cb_diff_q12 = + (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]); + const int cbcr_diff_q12 = + (cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]); + const int cr_diff_q12 = + (cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]); const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; @@ -67,6 +73,52 @@ static int evaluate_skin_color_difference(int cb, int cr) return skin_diff; } +// Checks if the input yCbCr values corresponds to skin color. +static int is_skin_color(int y, int cb, int cr, int consec_zeromv) +{ + if (y < 40 || y > 220) + { + return 0; + } + else + { + if (MODEL_MODE == 0) + { + return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]); + } + else + { + int i = 0; + // No skin if block has been zero motion for long consecutive time. + if (consec_zeromv > 60) + return 0; + // Exit on grey. + if (cb == 128 && cr == 128) + return 0; + // Exit on very strong cb. + if (cb > 150 && cr < 110) + return 0; + for (; i < 5; i++) { + int skin_color_diff = evaluate_skin_color_difference(cb, cr, i); + if (skin_color_diff < skin_threshold[i + 1]) { + if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2)) + return 0; + else if (consec_zeromv > 25 && + skin_color_diff > (skin_threshold[i + 1] >> 1)) + return 0; + else + return 1; + } + // Exit if difference is much large than the threshold. + if (skin_color_diff > (skin_threshold[i + 1] << 3)) { + return 0; + } + } + return 0; + } + } +} + static int macroblock_corner_grad(unsigned char* signal, int stride, int offsetx, int offsety, int sgnx, int sgny) { @@ -157,16 +209,6 @@ static int check_dot_artifact_candidate(VP8_COMP *cpi, return 0; } -// Checks if the input yCbCr values corresponds to skin color. -static int is_skin_color(int y, int cb, int cr) -{ - if (y < 40 || y > 220) - { - return 0; - } - return (evaluate_skin_color_difference(cb, cr) < skin_threshold); -} - int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, @@ -828,8 +870,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->src.v_buffer[4 * x->src.uv_stride + 3] + x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2; x->is_skin = 0; - if (!cpi->oxcf.screen_content_mode) - x->is_skin = is_skin_color(y, cb, cr); + if (!cpi->oxcf.screen_content_mode) { + int block_index = mb_row * cpi->common.mb_cols + mb_col; + x->is_skin = is_skin_color(y, cb, cr, cpi->consec_zero_last[block_index]); + } } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { @@ -1433,7 +1477,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, recon_yoffset, recon_uvoffset, &cpi->common.lf_info, mb_row, mb_col, - block_index); + block_index, + cpi->consec_zero_last_mvbias[block_index]); // Reevaluate ZEROMV after denoising: for large noise content // (i.e., cpi->mse_source_denoised is above threshold), do this for all diff --git a/libvpx/vp8/encoder/rdopt.c b/libvpx/vp8/encoder/rdopt.c index ab0ad1599..6507ae9f1 100644 --- a/libvpx/vp8/encoder/rdopt.c +++ b/libvpx/vp8/encoder/rdopt.c @@ -1899,7 +1899,8 @@ static int calculate_final_rd_costs(int this_rd, int prob_skip_cost; prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); - prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0); + prob_skip_cost -= + (int)vp8_cost_bit(cpi->prob_skip_false, 0); rd->rate2 += prob_skip_cost; *other_cost += prob_skip_cost; } @@ -2530,7 +2531,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, recon_yoffset, recon_uvoffset, &cpi->common.lf_info, mb_row, mb_col, - block_index); + block_index, 0); /* Reevaluate ZEROMV after denoising. */ if (best_mode.mbmode.ref_frame == INTRA_FRAME && diff --git a/libvpx/vp8/encoder/vp8_quantize.c b/libvpx/vp8/encoder/vp8_quantize.c index ee922c9d6..0d101ba5a 100644 --- a/libvpx/vp8/encoder/vp8_quantize.c +++ b/libvpx/vp8/encoder/vp8_quantize.c @@ -227,12 +227,12 @@ static void invert_quant(int improved_quant, short *quant, if(improved_quant) { unsigned t; - int l; + int l, m; t = d; for(l = 0; t > 1; l++) t>>=1; - t = 1 + (1<<(16+l))/d; - *quant = (short)(t - (1<<16)); + m = 1 + (1<<(16+l))/d; + *quant = (short)(m - (1<<16)); *shift = l; /* use multiplication and constant shift by 16 */ *shift = 1 << (16 - *shift); diff --git a/libvpx/vp8/vp8_cx_iface.c b/libvpx/vp8/vp8_cx_iface.c index c125ae84d..22a82b734 100644 --- a/libvpx/vp8/vp8_cx_iface.c +++ b/libvpx/vp8/vp8_cx_iface.c @@ -22,6 +22,7 @@ #include "vpx/vp8cx.h" #include "vp8/encoder/firstpass.h" #include "vp8/common/onyx.h" +#include "vp8/common/common.h" #include <stdlib.h> #include <string.h> @@ -760,7 +761,7 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, unsigned long duration, unsigned long deadline) { - unsigned int new_qc; + int new_qc; #if !(CONFIG_REALTIME_ONLY) /* Use best quality mode if no deadline is given. */ @@ -782,10 +783,13 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, } #else + (void)duration; new_qc = MODE_REALTIME; #endif - if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS) + if (deadline == VPX_DL_REALTIME) + new_qc = MODE_REALTIME; + else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS) new_qc = MODE_FIRSTPASS; else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS) new_qc = (new_qc == MODE_BESTQUALITY) @@ -1116,7 +1120,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; - vp8_ppflags_t flags = {0}; + vp8_ppflags_t flags; + vp8_zero(flags); if (ctx->preview_ppcfg.post_proc_flag) { @@ -1162,31 +1167,6 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) return NULL; } -static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int update = va_arg(args, int); - vp8_update_entropy(ctx->cpi, update); - return VPX_CODEC_OK; - -} - -static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int update = va_arg(args, int); - vp8_update_reference(ctx->cpi, update); - return VPX_CODEC_OK; -} - -static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int reference_flag = va_arg(args, int); - vp8_use_as_reference(ctx->cpi, reference_flag); - return VPX_CODEC_OK; -} - static vpx_codec_err_t vp8e_set_frame_flags(vpx_codec_alg_priv_t *ctx, va_list args) { @@ -1330,8 +1310,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = 30, /* rc_resize_up_thresold */ VPX_VBR, /* rc_end_usage */ - {0}, /* rc_twopass_stats_in */ - {0}, /* rc_firstpass_mb_stats_in */ + {NULL, 0}, /* rc_twopass_stats_in */ + {NULL, 0}, /* rc_firstpass_mb_stats_in */ 256, /* rc_target_bandwidth */ 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ @@ -1359,6 +1339,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = {0}, /* ts_rate_decimator */ 0, /* ts_periodicity */ {0}, /* ts_layer_id */ + {0}, /* layer_target_bitrate */ + 0 /* temporal_layering_mode */ }}, }; diff --git a/libvpx/vp8/vp8_dx_iface.c b/libvpx/vp8/vp8_dx_iface.c index a12a2ad0e..fc9288d62 100644 --- a/libvpx/vp8/vp8_dx_iface.c +++ b/libvpx/vp8/vp8_dx_iface.c @@ -9,6 +9,7 @@ */ +#include <assert.h> #include <stdlib.h> #include <string.h> #include "./vp8_rtcd.h" @@ -67,10 +68,11 @@ struct vpx_codec_alg_priv FRAGMENT_DATA fragments; }; -static void vp8_init_ctx(vpx_codec_ctx_t *ctx) +static int vp8_init_ctx(vpx_codec_ctx_t *ctx) { vpx_codec_alg_priv_t *priv = (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); + if (!priv) return 1; ctx->priv = (vpx_codec_priv_t *)priv; ctx->priv->init_flags = ctx->init_flags; @@ -85,6 +87,8 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx) priv->cfg = *ctx->config.dec; ctx->config.dec = &priv->cfg; } + + return 0; } static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, @@ -103,7 +107,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, * information becomes known. */ if (!ctx->priv) { - vp8_init_ctx(ctx); + if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR; priv = (vpx_codec_alg_priv_t *)ctx->priv; /* initialize number of fragments to zero */ @@ -151,6 +155,8 @@ static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, { vpx_codec_err_t res = VPX_CODEC_OK; + assert(data != NULL); + if(data + data_sz <= data) { res = VPX_CODEC_INVALID_PARAM; @@ -516,7 +522,8 @@ static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, { YV12_BUFFER_CONFIG sd; int64_t time_stamp = 0, time_end_stamp = 0; - vp8_ppflags_t flags = {0}; + vp8_ppflags_t flags; + vp8_zero(flags); if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { @@ -810,11 +817,12 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) = }, { /* encoder functions */ 0, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL + NULL, /* vpx_codec_enc_cfg_map_t */ + NULL, /* vpx_codec_encode_fn_t */ + NULL, /* vpx_codec_get_cx_data_fn_t */ + NULL, /* vpx_codec_enc_config_set_fn_t */ + NULL, /* vpx_codec_get_global_headers_fn_t */ + NULL, /* vpx_codec_get_preview_frame_fn_t */ + NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */ } }; diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c index 24c6c54ed..7dd1005d3 100644 --- a/libvpx/vp9/common/vp9_alloccommon.c +++ b/libvpx/vp9/common/vp9_alloccommon.c @@ -119,6 +119,20 @@ void vp9_free_context_buffers(VP9_COMMON *cm) { cm->lf.lfm = NULL; } + +int vp9_alloc_loop_filter(VP9_COMMON *cm) { + vpx_free(cm->lf.lfm); + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; + cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, + sizeof(*cm->lf.lfm)); + if (!cm->lf.lfm) + return 1; + return 0; +} + int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { int new_mi_size; @@ -151,15 +165,8 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { cm->above_context_alloc_cols = cm->mi_cols; } - vpx_free(cm->lf.lfm); - - // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The - // stride and rows are rounded up / truncated to a multiple of 8. - cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; - cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, - sizeof(*cm->lf.lfm)); - if (!cm->lf.lfm) goto fail; + if (vp9_alloc_loop_filter(cm)) + goto fail; return 0; diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h index c0e51a6ce..e53955b99 100644 --- a/libvpx/vp9/common/vp9_alloccommon.h +++ b/libvpx/vp9/common/vp9_alloccommon.h @@ -23,6 +23,7 @@ struct BufferPool; void vp9_remove_common(struct VP9Common *cm); +int vp9_alloc_loop_filter(struct VP9Common *cm); int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_init_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm); diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c index 0e104ee59..7bab27d4f 100644 --- a/libvpx/vp9/common/vp9_blockd.c +++ b/libvpx/vp9/common/vp9_blockd.c @@ -13,7 +13,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b) { if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(&left_mi->mbmi)) + if (!left_mi || is_inter_block(left_mi)) return DC_PRED; return get_y_mode(left_mi, b + 1); @@ -26,7 +26,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *above_mi, int b) { if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(&above_mi->mbmi)) + if (!above_mi || is_inter_block(above_mi)) return DC_PRED; return get_y_mode(above_mi, b + 2); @@ -40,12 +40,12 @@ void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; + const MODE_INFO* mi = xd->mi[0]; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) - : mbmi->tx_size; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) + : mi->tx_size; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index 61eb59162..3d26fb2b5 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -64,7 +64,7 @@ typedef struct { typedef int8_t MV_REFERENCE_FRAME; // This structure now relates to 8x8 block regions. -typedef struct { +typedef struct MODE_INFO { // Common for both INTER and INTRA blocks BLOCK_SIZE sb_type; PREDICTION_MODE mode; @@ -82,24 +82,21 @@ typedef struct { // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. int_mv mv[2]; -} MB_MODE_INFO; -typedef struct MODE_INFO { - MB_MODE_INFO mbmi; b_mode_info bmi[4]; } MODE_INFO; static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { - return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode - : mi->mbmi.mode; + return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode + : mi->mode; } -static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[0] > INTRA_FRAME; +static INLINE int is_inter_block(const MODE_INFO *mi) { + return mi->ref_frame[0] > INTRA_FRAME; } -static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[1] > INTRA_FRAME; +static INLINE int has_second_ref(const MODE_INFO *mi) { + return mi->ref_frame[1] > INTRA_FRAME; } PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, @@ -160,11 +157,9 @@ typedef struct macroblockd { MODE_INFO **mi; MODE_INFO *left_mi; MODE_INFO *above_mi; - MB_MODE_INFO *left_mbmi; - MB_MODE_INFO *above_mbmi; - int up_available; - int left_available; + unsigned int max_blocks_wide; + unsigned int max_blocks_high; const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; @@ -212,19 +207,19 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd) { - const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const MODE_INFO *const mi = xd->mi[0]; - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi)) + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; - return intra_mode_to_tx_type_lookup[mbmi->mode]; + return intra_mode_to_tx_type_lookup[mi->mode]; } static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { const MODE_INFO *const mi = xd->mi[0]; - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi)) + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; @@ -242,9 +237,9 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, } } -static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi, +static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, const struct macroblockd_plane *pd) { - return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, + return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, pd->subsampling_y); } diff --git a/libvpx/vp9/common/vp9_common.h b/libvpx/vp9/common/vp9_common.h index 76e7cd440..908fa80a3 100644 --- a/libvpx/vp9/common/vp9_common.h +++ b/libvpx/vp9/common/vp9_common.h @@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { #define VP9_FRAME_MARKER 0x2 - #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c index a6dae6a1c..3409d0484 100644 --- a/libvpx/vp9/common/vp9_common_data.c +++ b/libvpx/vp9/common/vp9_common_data.c @@ -159,3 +159,18 @@ const struct { {0, 8 }, // 64X32 - {0b0000, 0b1000} {0, 0 }, // 64X64 - {0b0000, 0b0000} }; + +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +const uint8_t need_top_left[INTRA_MODES] = { + 0, // DC_PRED + 0, // V_PRED + 0, // H_PRED + 0, // D45_PRED + 1, // D135_PRED + 1, // D117_PRED + 1, // D153_PRED + 0, // D207_PRED + 0, // D63_PRED + 1, // TM_PRED +}; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h index 95a117961..0ae24dad5 100644 --- a/libvpx/vp9/common/vp9_common_data.h +++ b/libvpx/vp9/common/vp9_common_data.h @@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +extern const uint8_t need_top_left[INTRA_MODES]; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/common/vp9_debugmodes.c b/libvpx/vp9/common/vp9_debugmodes.c index 3d80103d2..d9c1fd968 100644 --- a/libvpx/vp9/common/vp9_debugmodes.c +++ b/libvpx/vp9/common/vp9_debugmodes.c @@ -35,7 +35,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", - *((int*) ((char *) (&mi[0]->mbmi) + + *((int*) ((char *) (mi[0]) + member_offset))); mi++; } @@ -53,18 +53,18 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int rows = cm->mi_rows; int cols = cm->mi_cols; - print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type)); - print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode)); - print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0])); - print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size)); - print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode)); + print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); + print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); + print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); + print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); + print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); // output skip infomation. log_frame_info(cm, "Skips:", mvs); for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "S "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi[0]->mbmi.skip); + fprintf(mvs, "%2d ", mi[0]->skip); mi++; } fprintf(mvs, "\n"); @@ -78,8 +78,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row, - mi[0]->mbmi.mv[0].as_mv.col); + fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, + mi[0]->mv[0].as_mv.col); mi++; } fprintf(mvs, "\n"); diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c index 579857bc9..7b490af34 100644 --- a/libvpx/vp9/common/vp9_entropy.c +++ b/libvpx/vp9/common/vp9_entropy.c @@ -36,20 +36,6 @@ const vpx_prob vp9_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; #if CONFIG_VP9_HIGHBITDEPTH -const vpx_prob vp9_cat1_prob_high10[] = { 159 }; -const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp9_cat6_prob_high10[] = { - 255, 255, 254, 254, 254, 252, 249, 243, - 230, 196, 177, 153, 140, 133, 130, 129 -}; -const vpx_prob vp9_cat1_prob_high12[] = { 159 }; -const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 }; const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 @@ -403,7 +389,6 @@ const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { {255, 241, 243, 255, 236, 255, 252, 254}, {255, 243, 245, 255, 237, 255, 252, 254}, {255, 246, 247, 255, 239, 255, 253, 255}, - {255, 246, 247, 255, 239, 255, 253, 255}, }; static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { @@ -743,8 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { }; static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { - memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1], - MODEL_NODES * sizeof(vpx_prob)); + assert(p != 0); + memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); } void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h index 21611ed6d..63b3bff5d 100644 --- a/libvpx/vp9/common/vp9_entropy.h +++ b/libvpx/vp9/common/vp9_entropy.h @@ -138,7 +138,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly -#define COEFF_PROB_MODELS 256 +#define COEFF_PROB_MODELS 255 #define UNCONSTRAINED_NODES 3 diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c index 3acfe1448..566ae91cf 100644 --- a/libvpx/vp9/common/vp9_entropymv.c +++ b/libvpx/vp9/common/vp9_entropymv.c @@ -11,9 +11,6 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" -// Integer pel reference mv threshold for use of high-precision 1/8 mv -#define COMPANDED_MVREF_THRESH 8 - const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, @@ -127,11 +124,6 @@ MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { return c; } -int vp9_use_mv_hp(const MV *ref) { - return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && - (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; -} - static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr, int usehp) { int s, z, c, o, d, e, f; diff --git a/libvpx/vp9/common/vp9_entropymv.h b/libvpx/vp9/common/vp9_entropymv.h index 8c817bf7b..2f05ad44b 100644 --- a/libvpx/vp9/common/vp9_entropymv.h +++ b/libvpx/vp9/common/vp9_entropymv.h @@ -27,7 +27,14 @@ struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); -int vp9_use_mv_hp(const MV *ref); + +// Integer pel reference mv threshold for use of high-precision 1/8 mv +#define COMPANDED_MVREF_THRESH 8 + +static INLINE int use_mv_hp(const MV *ref) { + return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && + (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; +} #define MV_UPDATE_PROB 252 diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c index d12cd76db..1b420143b 100644 --- a/libvpx/vp9/common/vp9_idct.c +++ b/libvpx/vp9/common/vp9_idct.c @@ -174,6 +174,9 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, else if (eob <= 34) // non-zero coeff only in upper-left 8x8 vpx_idct32x32_34_add(input, dest, stride); + else if (eob <= 135) + // non-zero coeff only in upper-left 16x16 + vpx_idct32x32_135_add(input, dest, stride); else vpx_idct32x32_1024_add(input, dest, stride); } diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index b8a113223..183dec4e7 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -232,9 +232,9 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { } static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, - const MB_MODE_INFO *mbmi) { - return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] - [mode_lf_lut[mbmi->mode]]; + const MODE_INFO *mi) { + return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] + [mode_lf_lut[mi->mode]]; } void vp9_loop_filter_init(VP9_COMMON *cm) { @@ -298,200 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - // TODO(yunqingwang): count in loopfilter functions should be removed. - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); - } else if (mask_16x16_0 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + uint8_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr); } else { - vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_8x8_0 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_int_0 & 1) { - vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_vert_row2(int subsampling_factor, uint16_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - // TODO(yunqingwang): count in loopfilter functions should be removed. - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); - } else if (mask_16x16_0 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + uint16_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_8x8_0 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_0 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_int_0 & 1) { - vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, + lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -501,30 +469,30 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); count = 2; } else { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -537,23 +505,23 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -565,22 +533,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } - } else if (mask_4x4_int & 1) { + } else { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; @@ -598,30 +566,30 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -635,26 +603,26 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -667,25 +635,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } - } else if (mask_4x4_int & 1) { + } else { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } s += 8 * count; @@ -704,16 +672,14 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, // whether there were any coefficients encoded, and the loop filter strength // block we are currently looking at. Shift is used to position the // 1's we produce. -// TODO(JBB) Need another function for different resolution color.. static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, const int shift_uv, LOOP_FILTER_MASK *lfm) { - const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - const int filter_level = get_filter_level(lfi_n, mbmi); + const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; @@ -754,7 +720,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; // Here we are adding a mask for the transform size. The transform @@ -788,10 +754,9 @@ static void build_masks(const loop_filter_info_n *const lfi_n, static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, LOOP_FILTER_MASK *lfm) { - const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; - const int filter_level = get_filter_level(lfi_n, mbmi); + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; @@ -812,7 +777,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; *above_y |= (size_mask[block_size] & @@ -941,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. -// TODO(JBB): This function only works for yv12. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, MODE_INFO **mi, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { @@ -977,10 +941,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, vp9_zero(*lfm); assert(mip[0] != NULL); - // TODO(jimbankoski): Try moving most of the following code into decode - // loop and storing lfm in the mbmi structure so that we don't have to go - // through the recursive loop structure multiple times. - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_64X64: build_masks(lfi_n, mip[0] , 0, 0, lfm); break; @@ -1006,7 +967,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, const int mi_32_row_offset = ((idx_32 >> 1) << 2); if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) continue; - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_32X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; @@ -1036,7 +997,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) continue; - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_16X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; @@ -1083,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, } break; } - - vp9_adjust_mask(cm, mi_row, mi_col, lfm); } static void filter_selectively_vert(uint8_t *s, int pitch, @@ -1092,25 +1051,25 @@ static void filter_selectively_vert(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1126,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { @@ -1140,15 +1099,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } if (mask_4x4_int & 1) vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1186,8 +1145,8 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const MODE_INFO *mi = mi_8x8[c]; - const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; - const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); + const BLOCK_SIZE sb_type = mi[0].sb_type; + const int skip_this = mi[0].skip && is_inter_block(mi); // left edge of current unit is block/partition edge -> no skip const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; @@ -1196,13 +1155,13 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; const int skip_this_r = skip_this && !block_edge_above; - const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane); + const TX_SIZE tx_size = get_uv_tx_size(mi, plane); const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, &mi[0].mbmi))) + get_filter_level(&cm->lf_info, mi))) continue; // Build masks based on the transform size of each block @@ -1263,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mi_8x8 += row_step_stride; @@ -1312,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; } @@ -1350,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - unsigned int mask_16x16_l = mask_16x16 & 0xffff; - unsigned int mask_8x8_l = mask_8x8 & 0xffff; - unsigned int mask_4x4_l = mask_4x4 & 0xffff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2( - plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << 3], (int)cm->bit_depth); + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, + &lfm->lfl_y[r << 3], + (int)cm->bit_depth); } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 16; @@ -1403,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz( - CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1449,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } - { - unsigned int mask_16x16_l = mask_16x16 & 0xff; - unsigned int mask_8x8_l = mask_8x8 & 0xff; - unsigned int mask_4x4_l = mask_4x4 & 0xff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2( - plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); - } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); - } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { #endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; } // Horizontal pass @@ -1512,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, if (cm->use_highbitdepth) { highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); + mask_4x4_r, mask_4x4_int_r, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1558,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - // TODO(JBB): Make setup_mask work for non 420. + // TODO(jimbankoski): For 444 only need to do y mask. vp9_adjust_mask(cm, mi_row, mi_col, lfm); vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); @@ -1598,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, } // Used by the encoder to build the loopfilter masks. +// TODO(slavarnway): Do the encoder the same way the decoder does it and +// build the masks in line as part of the encode process. void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; @@ -1640,12 +1588,12 @@ static const uint8_t first_block_in_16x16[8][8] = { // This function sets up the bit masks for a block represented // by mi_row, mi_col in a 64x64 region. // TODO(SJL): This function only works for yv12. -void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row, +void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh) { - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; const loop_filter_info_n *const lfi_n = &cm->lf_info; - const int filter_level = get_filter_level(lfi_n, mbmi); + const int filter_level = get_filter_level(lfi_n, mi); const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); uint64_t *const left_y = &lfm->left_y[tx_size_y]; @@ -1693,7 +1641,7 @@ void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; // Add a mask for the transform size. The transform size mask is set to diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h index 7f943ea09..fca8830fa 100644 --- a/libvpx/vp9/common/vp9_loopfilter.h +++ b/libvpx/vp9/common/vp9_loopfilter.h @@ -69,6 +69,7 @@ typedef struct { struct loopfilter { int filter_level; + int last_filt_level; int sharpness_level; int last_sharpness_level; @@ -134,7 +135,7 @@ static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; } -void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row, +void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh); void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, const int mi_col, LOOP_FILTER_MASK *lfm); diff --git a/libvpx/vp9/common/vp9_mfqe.c b/libvpx/vp9/common/vp9_mfqe.c index 6d560f438..f5264665b 100644 --- a/libvpx/vp9/common/vp9_mfqe.c +++ b/libvpx/vp9/common/vp9_mfqe.c @@ -203,12 +203,12 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { // Check the motion in current block(for inter frame), // or check the motion in the correlated block in last frame (for keyframe). - const int mv_len_square = mi->mbmi.mv[0].as_mv.row * - mi->mbmi.mv[0].as_mv.row + - mi->mbmi.mv[0].as_mv.col * - mi->mbmi.mv[0].as_mv.col; + const int mv_len_square = mi->mv[0].as_mv.row * + mi->mv[0].as_mv.row + + mi->mv[0].as_mv.col * + mi->mv[0].as_mv.col; const int mv_threshold = 100; - return mi->mbmi.mode >= NEARESTMV && // Not an intra block + return mi->mode >= NEARESTMV && // Not an intra block cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold; } @@ -220,7 +220,7 @@ static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, int uvd_stride) { int mi_offset, y_offset, uv_offset; - const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; + const BLOCK_SIZE cur_bs = mi->sb_type; const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; const int bsl = b_width_log2_lookup[bs]; PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c index 77d1ff459..0eb01a51b 100644 --- a/libvpx/vp9/common/vp9_mvref_common.c +++ b/libvpx/vp9/common/vp9_mvref_common.c @@ -11,20 +11,19 @@ #include "vp9/common/vp9_mvref_common.h" -// This function searches the neighbourhood of a given MB/SB +// This function searches the neighborhood of a given MB/SB // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int block, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; + const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; int different_ref_found = 0; int context_counter = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; const TileInfo *const tile = &xd->tile; @@ -39,15 +38,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; + context_counter += mode_2_counter[candidate_mi->mode]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) + if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) + else if (candidate_mi->ref_frame[1] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), refmv_count, mv_ref_list, Done); } @@ -59,34 +57,19 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done); + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); } } - // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast - // on windows platform. The sync here is unncessary if use_perv_frame_mvs - // is 0. But after removing it, there will be hang in the unit test on windows - // due to several threads waiting for a thread's signal. -#if defined(_WIN32) && !HAVE_PTHREAD_H - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } -#endif - // Check the last frame's mode and mv info. if (cm->use_prev_frame_mvs) { - // Synchronize here for frame parallel decode if sync function is provided. - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } - if (prev_frame_mvs->ref_frame[0] == ref_frame) { ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { @@ -101,11 +84,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, + IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } @@ -150,20 +133,9 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context) { find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, sync, data, mode_context); -} - -static void lower_mv_precision(MV *mv, int allow_hp) { - const int use_hp = allow_hp && vp9_use_mv_hp(mv); - if (!use_hp) { - if (mv->row & 1) - mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) - mv->col += (mv->col > 0 ? -1 : 1); - } + mi_row, mi_col, mode_context); } void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, @@ -190,8 +162,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); - find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, - mi_row, mi_col, NULL, NULL, mode_context); + find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, + mi_row, mi_col, mode_context); near_mv->as_int = 0; switch (block) { diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h index bd216d433..4380843e2 100644 --- a/libvpx/vp9/common/vp9_mvref_common.h +++ b/libvpx/vp9/common/vp9_mvref_common.h @@ -136,19 +136,19 @@ static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { // on whether the block_size < 8x8 and we have check_sub_blocks set. static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, int search_col, int block_idx) { - return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 + return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] .as_mv[which_mv] - : candidate->mbmi.mv[which_mv]; + : candidate->mv[which_mv]; } // Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, +static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, const MV_REFERENCE_FRAME this_ref_frame, const int *ref_sign_bias) { - int_mv mv = mbmi->mv[ref]; - if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { + int_mv mv = mi->mv[ref]; + if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } @@ -157,7 +157,7 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also -// skip all additional processing and jump to done! +// skip all additional processing and jump to Done! #define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ do { \ if (refmv_count) { \ @@ -207,11 +207,20 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } +static INLINE void lower_mv_precision(MV *mv, int allow_hp) { + const int use_hp = allow_hp && use_mv_hp(mv); + if (!use_hp) { + if (mv->row & 1) + mv->row += (mv->row > 0 ? -1 : 1); + if (mv->col & 1) + mv->col += (mv->col > 0 ? -1 : 1); + } +} + typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context); // check a list of motion vectors by sad score using a number rows of pixels diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index ceffdedf9..3fd935e62 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -404,25 +404,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; // Are edges available for intra prediction? - xd->up_available = (mi_row != 0); - xd->left_available = (mi_col > tile->mi_col_start); - if (xd->up_available) { - xd->above_mi = xd->mi[-xd->mi_stride]; - // above_mi may be NULL in VP9 encoder's first pass. - xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL; - } else { - xd->above_mi = NULL; - xd->above_mbmi = NULL; - } - - if (xd->left_available) { - xd->left_mi = xd->mi[-1]; - // left_mi may be NULL in VP9 encoder's first pass. - xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL; - } else { - xd->left_mi = NULL; - xd->left_mbmi = NULL; - } + xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; + xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; } static INLINE void update_partition_context(MACROBLOCKD *xd, diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c index b685d813b..c04cc8f05 100644 --- a/libvpx/vp9/common/vp9_postproc.c +++ b/libvpx/vp9/common/vp9_postproc.c @@ -12,6 +12,7 @@ #include <stdlib.h> #include <stdio.h> +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" @@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) { state->last_noise = a; } -void vp9_plane_add_noise_c(uint8_t *start, char *noise, - char blackclamp[16], - char whiteclamp[16], - char bothclamp[16], - unsigned int width, unsigned int height, int pitch) { - unsigned int i, j; - - // TODO(jbb): why does simd code use both but c doesn't, normalize and - // fix.. - (void) bothclamp; - for (i = 0; i < height; i++) { - uint8_t *pos = start + i * pitch; - char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT - - for (j = 0; j < width; j++) { - if (pos[j] < blackclamp[0]) - pos[j] = blackclamp[0]; - - if (pos[j] > 255 + whiteclamp[0]) - pos[j] = 255 + whiteclamp[0]; - - pos[j] += ref[j]; - } - } -} - static void swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. MODE_INFO *temp = cm->postproc_state.prev_mip; @@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, ppstate->last_noise != noise_level) { fillrd(ppstate, 63 - q, noise_level); } - - vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, + vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, ppstate->whiteclamp, ppstate->bothclamp, ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); } diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c index 1f1632573..8f90e70e7 100644 --- a/libvpx/vp9/common/vp9_pred_common.c +++ b/libvpx/vp9/common/vp9_pred_common.c @@ -17,82 +17,57 @@ int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int left_type = xd->left_available && is_inter_block(left_mbmi) ? - left_mbmi->interp_filter : SWITCHABLE_FILTERS; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const int above_type = xd->up_available && is_inter_block(above_mbmi) ? - above_mbmi->interp_filter : SWITCHABLE_FILTERS; + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi && is_inter_block(left_mi) ? + left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi && is_inter_block(above_mi) ? + above_mi->interp_filter : SWITCHABLE_FILTERS; if (left_type == above_type) return left_type; - else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS) + else if (left_type == SWITCHABLE_FILTERS) return above_type; - else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS) + else if (above_type == SWITCHABLE_FILTERS) return left_type; else return SWITCHABLE_FILTERS; } -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real macroblocks. -// The prediction flags in these dummy entries are initialized to 0. -// 0 - inter/inter, inter/--, --/inter, --/-- -// 1 - intra/inter, inter/intra -// 2 - intra/--, --/intra -// 3 - intra/intra -int vp9_get_intra_inter_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); - return left_intra && above_intra ? 3 - : left_intra || above_intra; - } else if (has_above || has_left) { // one edge available - return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi); - } else { - return 0; - } -} - int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int ctx; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi)) + if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) // neither edge uses comp pred (0/1) - ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^ - (left_mbmi->ref_frame[0] == cm->comp_fixed_ref); - else if (!has_second_ref(above_mbmi)) + ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ + (left_mi->ref_frame[0] == cm->comp_fixed_ref); + else if (!has_second_ref(above_mi)) // one of two edges uses comp pred (2/3) - ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(above_mbmi)); - else if (!has_second_ref(left_mbmi)) + ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(above_mi)); + else if (!has_second_ref(left_mi)) // one of two edges uses comp pred (2/3) - ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(left_mbmi)); + ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(left_mi)); else // both edges use comp pred (4) ctx = 4; } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!has_second_ref(edge_mbmi)) + if (!has_second_ref(edge_mi)) // edge does not use comp pred (0/1) - ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref; + ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; else // edge uses comp pred (3) ctx = 3; @@ -107,39 +82,39 @@ int vp9_get_reference_mode_context(const VP9_COMMON *cm, int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int above_in_image = xd->up_available; - const int left_in_image = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_in_image = !!above_mi; + const int left_in_image = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int var_ref_idx = !fix_ref_idx; if (above_in_image && left_in_image) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra (2) pred_context = 2; } else if (above_intra || left_intra) { // intra/inter - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mbmi)) // single pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); + if (!has_second_ref(edge_mi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] + pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); } else { // inter/inter - const int l_sg = !has_second_ref(left_mbmi); - const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] - : above_mbmi->ref_frame[var_ref_idx]; - const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] - : left_mbmi->ref_frame[var_ref_idx]; + const int l_sg = !has_second_ref(left_mi); + const int a_sg = !has_second_ref(above_mi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] + : above_mi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] + : left_mi->ref_frame[var_ref_idx]; if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { pred_context = 0; @@ -167,16 +142,16 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, } } } else if (above_in_image || left_in_image) { // one edge available - const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; - if (!is_inter_block(edge_mbmi)) { + if (!is_inter_block(edge_mi)) { pred_context = 2; } else { - if (has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] + if (has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); else - pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); + pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); } } else { // no edges available (2) pred_context = 2; @@ -188,34 +163,34 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; - if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else - pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME || - edge_mbmi->ref_frame[1] == LAST_FRAME); + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); } else { // inter/inter - const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); - const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || @@ -234,15 +209,15 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { } } } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; - if (!is_inter_block(edge_mbmi)) { // intra + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + if (!is_inter_block(edge_mi)) { // intra pred_context = 2; } else { // inter - if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else - pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME || - edge_mbmi->ref_frame[1] == LAST_FRAME); + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); } } else { // no edges available pred_context = 2; @@ -254,39 +229,39 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; - if (!has_second_ref(edge_mbmi)) { - if (edge_mbmi->ref_frame[0] == LAST_FRAME) + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) { + if (edge_mi->ref_frame[0] == LAST_FRAME) pred_context = 3; else - pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); } else { - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || - edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); } } else { // inter/inter - const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); - const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) @@ -321,16 +296,16 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { } } } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!is_inter_block(edge_mbmi) || - (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi))) + if (!is_inter_block(edge_mi) || + (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) pred_context = 2; - else if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + else if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); else - pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || - edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); } else { // no edges available (2) pred_context = 2; } diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h index 6f7af4a50..f3c676e95 100644 --- a/libvpx/vp9/common/vp9_pred_common.h +++ b/libvpx/vp9/common/vp9_pred_common.h @@ -42,8 +42,8 @@ static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int above_sip = (above_mi != NULL) ? - above_mi->mbmi.seg_id_predicted : 0; - const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0; + above_mi->seg_id_predicted : 0; + const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; return above_sip + left_sip; } @@ -56,8 +56,8 @@ static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; - const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0; - const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0; + const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; + const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; return above_skip + left_skip; } @@ -68,11 +68,32 @@ static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); -int vp9_get_intra_inter_context(const MACROBLOCKD *xd); +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real macroblocks. +// The prediction flags in these dummy entries are initialized to 0. +// 0 - inter/inter, inter/--, --/inter, --/-- +// 1 - intra/inter, inter/intra +// 2 - intra/--, --/intra +// 3 - intra/intra +static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + return left_intra && above_intra ? 3 : left_intra || above_intra; + } else if (has_above || has_left) { // one edge available + return 2 * !is_inter_block(has_above ? above_mi : left_mi); + } + return 0; +} static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { - return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)]; + return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; } int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); @@ -110,15 +131,15 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type]; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - int above_ctx = (has_above && !above_mbmi->skip) ? (int)above_mbmi->tx_size - : max_tx_size; - int left_ctx = (has_left && !left_mbmi->skip) ? (int)left_mbmi->tx_size - : max_tx_size; + const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size + : max_tx_size; + int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size + : max_tx_size; if (!has_left) left_ctx = above_ctx; diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index d8c14ecc8..84718e970 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -20,19 +20,6 @@ #include "vp9/common/vp9_reconintra.h" #if CONFIG_VP9_HIGHBITDEPTH -void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd) { - sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); -} - void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, @@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd); + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, + bd); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -159,8 +147,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter]; + const int is_compound = has_second_ref(mi); + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -168,9 +156,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->mbmi.sb_type < BLOCK_8X8 + const MV mv = mi->sb_type < BLOCK_8X8 ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + : mi->mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling @@ -190,6 +178,12 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, // Co-ordinate of containing block to pixel precision. const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); +#endif if (plane == 0) pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; else if (plane == 1) @@ -216,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); } else { inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -244,7 +238,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h index 7d907748e..07745e3aa 100644 --- a/libvpx/vp9/common/vp9_reconinter.h +++ b/libvpx/vp9/common/vp9_reconinter.h @@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, } #if CONFIG_VP9_HIGHBITDEPTH -void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd); +static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys, int bd) { + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +} #endif // CONFIG_VP9_HIGHBITDEPTH MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c index 3d84a2883..445785835 100644 --- a/libvpx/vp9/common/vp9_reconintra.c +++ b/libvpx/vp9/common/vp9_reconintra.c @@ -133,12 +133,16 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int frame_width, frame_height; int x0, y0; const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int need_left = extend_modes[mode] & NEED_LEFT; + const int need_above = extend_modes[mode] & NEED_ABOVE; + const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; int base = 128 << (bd - 8); // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z // 129 C D .. W X // 129 E F .. U V // 129 G H .. S T T T T T + // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. // Get current frame pointer, width and height. if (plane == 0) { @@ -153,79 +157,106 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - // left - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + // NEED_LEFT + if (need_left) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + vpx_memset16(left_col, base + 1, bs); } - } else { - // TODO(Peter): this value should probably change for high bitdepth - vpx_memset16(left_col, base + 1, bs); } - // TODO(hkuang) do not extend 2*bs pixels for all modes. - // above - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + // NEED_ABOVE + if (need_above) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + vpx_memset16(above_row, base - 1, bs); + above_row[-1] = base - 1; + } + } + + // NEED_ABOVERIGHT + if (need_aboveright) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; memcpy(above_row, above_ref, r * sizeof(above_row[0])); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); + else + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + above_row[-1] = left_available ? above_ref[-1] : (base + 1); } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); } - // TODO(Peter) this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base+1); } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); - else - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base+1); - } + vpx_memset16(above_row, base - 1, bs * 2); + above_row[-1] = base - 1; } - } else { - vpx_memset16(above_row, base - 1, bs * 2); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = base - 1; } // predict @@ -391,8 +422,8 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int aoff, int loff, int plane) { const int bw = (1 << bwl_in); const int txw = (1 << tx_size); - const int have_top = loff || xd->up_available; - const int have_left = aoff || xd->left_available; + const int have_top = loff || (xd->above_mi != NULL); + const int have_left = aoff || (xd->left_mi != NULL); const int have_right = (aoff + txw) < bw; const int x = aoff * 4; const int y = loff * 4; diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl index 5bf71ef9f..846133674 100644 --- a/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8 specialize qw/vp9_post_proc_down_and_across sse2/; $vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; -add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; -specialize qw/vp9_plane_add_noise sse2/; -$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; - add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; specialize qw/vp9_filter_by_weight16x16 sse2 msa/; @@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; specialize qw/vp9_highbd_post_proc_down_and_across/; - - add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; - specialize qw/vp9_highbd_plane_add_noise/; } # @@ -194,42 +187,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { -add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; -specialize qw/vp9_avg_8x8 sse2 neon msa/; - -add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p"; -specialize qw/vp9_avg_4x4 sse2 msa/; - -add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; -specialize qw/vp9_minmax_8x8 sse2/; - -add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff"; -specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc"; - -add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff"; -specialize qw/vp9_hadamard_16x16 sse2/; - -add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length"; -specialize qw/vp9_satd sse2/; - -add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height"; -specialize qw/vp9_int_pro_row sse2 neon/; - -add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width"; -specialize qw/vp9_int_pro_col sse2 neon/; - -add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl"; -specialize qw/vp9_vector_var neon sse2/; - -if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p"; - specialize qw/vp9_highbd_avg_8x8/; - add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p"; - specialize qw/vp9_highbd_avg_4x4/; - add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; - specialize qw/vp9_highbd_minmax_8x8/; -} - # ENCODEMB INVOKE # @@ -288,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; + specialize qw/vp9_fwht4x4/, "$sse2_x86inc"; } else { add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; specialize qw/vp9_fht4x4 sse2 msa/; @@ -300,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2 msa/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc"; + specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc"; } # @@ -312,10 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3; $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; -specialize qw/vp9_diamond_search_sad/; - -add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; -specialize qw/vp9_full_range_search/; +specialize qw/vp9_diamond_search_sad avx/; add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; specialize qw/vp9_temporal_filter_apply sse2 msa/; @@ -349,6 +303,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { } # End vp9_high encoder functions +# +# frame based scale +# +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { +} else { + add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst"; + specialize qw/vp9_scale_and_extend_frame ssse3/; +} + } # end encoder functions 1; diff --git a/libvpx/vp9/common/vp9_scan.c b/libvpx/vp9/common/vp9_scan.c index d6fb8b2d7..8b8b09f4a 100644 --- a/libvpx/vp9/common/vp9_scan.c +++ b/libvpx/vp9/common/vp9_scan.c @@ -229,10 +229,8 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { 990, 959, 1022, 991, 1023, }; -// Neighborhood 5-tuples for various scans and blocksizes, -// in {top, left, topleft, topright, bottomleft} order -// for each position in raster scan order. -// -1 indicates the neighbor does not exist. +// Neighborhood 2-tuples for various scans and blocksizes, +// in {top, left} order for each position in corresponding scan order. DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, diff --git a/libvpx/vp9/common/vp9_scan.h b/libvpx/vp9/common/vp9_scan.h index 1d86b5cfe..4c1ee8107 100644 --- a/libvpx/vp9/common/vp9_scan.h +++ b/libvpx/vp9/common/vp9_scan.h @@ -42,7 +42,7 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx) { const MODE_INFO *const mi = xd->mi[0]; - if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { + if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { return &vp9_default_scan_orders[tx_size]; } else { const PREDICTION_MODE mode = get_y_mode(mi, block_idx); diff --git a/libvpx/vp9/common/vp9_seg_common.c b/libvpx/vp9/common/vp9_seg_common.c index c8ef618b7..7af61629a 100644 --- a/libvpx/vp9/common/vp9_seg_common.c +++ b/libvpx/vp9/common/vp9_seg_common.c @@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = { void vp9_clearall_segfeatures(struct segmentation *seg) { vp9_zero(seg->feature_data); vp9_zero(seg->feature_mask); + seg->aq_av_offset = 0; } void vp9_enable_segfeature(struct segmentation *seg, int segment_id, diff --git a/libvpx/vp9/common/vp9_seg_common.h b/libvpx/vp9/common/vp9_seg_common.h index 5b75d8d4e..99a9440c1 100644 --- a/libvpx/vp9/common/vp9_seg_common.h +++ b/libvpx/vp9/common/vp9_seg_common.h @@ -46,7 +46,8 @@ struct segmentation { vpx_prob pred_probs[PREDICTION_PROBS]; int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; - unsigned int feature_mask[MAX_SEGMENTS]; + uint32_t feature_mask[MAX_SEGMENTS]; + int aq_av_offset; }; static INLINE int segfeature_active(const struct segmentation *seg, diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 8d312d03f..1c77b57ff 100644 --- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" diff --git a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm index ec8bfdb18..430762815 100644 --- a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm +++ b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm @@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm): %undef flimit4 -;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise, -; unsigned char blackclamp[16], -; unsigned char whiteclamp[16], -; unsigned char bothclamp[16], -; unsigned int width, unsigned int height, int pitch) -global sym(vp9_plane_add_noise_wmt) PRIVATE -sym(vp9_plane_add_noise_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -.addnoise_loop: - call sym(LIBVPX_RAND) WRT_PLT - mov rcx, arg(1) ;noise - and rax, 0xff - add rcx, rax - - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - - mov rdi, rcx - movsxd rcx, dword arg(5) ;[Width] - mov rsi, arg(0) ;Pos - xor rax,rax - -.addnoise_nextset: - movdqu xmm1,[rsi+rax] ; get the source - - psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb xmm1, [rdx+32] ;bothclamp - psubusb xmm1, [rdx+16] ;whiteclamp - - movdqu xmm2,[rdi+rax] ; get the noise for this line - paddb xmm1,xmm2 ; add it in - movdqu [rsi+rax],xmm1 ; store the result - - add rax,16 ; move to the next line - - cmp rax, rcx - jl .addnoise_nextset - - movsxd rax, dword arg(7) ; Pitch - add arg(0), rax ; Start += Pitch - sub dword arg(6), 1 ; Height -= 1 - jg .addnoise_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA align 16 rd42: diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c index f1916639b..d63912932 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/libvpx/vp9/decoder/vp9_decodeframe.c @@ -189,54 +189,31 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; - if (eob > 0) { - tran_low_t *const dqcoeff = pd->dqcoeff; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } + switch (tx_size) { + case TX_4X4: + vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); } } -#else + } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { @@ -258,18 +235,40 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, return; } } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } #endif // CONFIG_VP9_HIGHBITDEPTH - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } @@ -279,54 +278,31 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; - if (eob > 0) { - tran_low_t *const dqcoeff = pd->dqcoeff; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } + switch (tx_size) { + case TX_4X4: + vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); } } -#else + } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { @@ -348,33 +324,55 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, return; } } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } #endif // CONFIG_VP9_HIGHBITDEPTH - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, vpx_reader *r, - MB_MODE_INFO *const mbmi, + MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; - PREDICTION_MODE mode = (plane == 0) ? mbmi->mode : mbmi->uv_mode; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; uint8_t *dst; dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; - if (mbmi->sb_type < BLOCK_8X8) + if (mi->sb_type < BLOCK_8X8) if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; @@ -382,29 +380,33 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, dst, pd->dst.stride, dst, pd->dst.stride, col, row, plane); - if (!mbmi->skip) { + if (!mi->skip) { const TX_TYPE tx_type = (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; const scan_order *sc = (plane || xd->lossless) ? &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, - r, mbmi->segment_id); - inverse_transform_block_intra(xd, plane, tx_type, tx_size, - dst, pd->dst.stride, eob); + r, mi->segment_id); + if (eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, + dst, pd->dst.stride, eob); + } } } static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, - MB_MODE_INFO *const mbmi, int plane, + MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *sc = &vp9_default_scan_orders[tx_size]; const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, - mbmi->segment_id); + mi->segment_id); - inverse_transform_block_inter(xd, plane, tx_size, - &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], - pd->dst.stride, eob); + if (eob > 0) { + inverse_transform_block_inter( + xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], + pd->dst.stride, eob); + } return eob; } @@ -523,8 +525,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, } if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); } else { inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -552,7 +554,7 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, } #endif // CONFIG_VP9_HIGHBITDEPTH -static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, +static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel, @@ -587,7 +589,12 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, // Co-ordinate of containing block to pixel precision. int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); - +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); +#endif // Co-ordinate of the block to 1/16th pixel precision. x0_16 = (x_start + x) << SUBPEL_BITS; y0_16 = (y_start + y) << SUBPEL_BITS; @@ -657,8 +664,8 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, // Wait until reference block is ready. Pad 7 more pixels as last 7 // pixels of each superblock row can be changed by next superblock row. - if (pbi->frame_parallel_decode) - vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, + if (worker != NULL) + vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); // Skip border extension if block is inside the frame. @@ -684,16 +691,16 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, } else { // Wait until reference block is ready. Pad 7 more pixels as last 7 // pixels of each superblock row can be changed by next superblock row. - if (pbi->frame_parallel_decode) { - const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; - vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - } + if (worker != NULL) { + const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; + vp9_frameworker_wait(worker, ref_frame_buf, + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + } } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); } else { inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -711,55 +718,75 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; const MODE_INFO *mi = xd->mi[0]; - const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter]; - const BLOCK_SIZE sb_type = mi->mbmi.sb_type; - const int is_compound = has_second_ref(&mi->mbmi); - - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - - const int n4w_x4 = 4 * num_4x4_w; - const int n4h_x4 = 4 * num_4x4_h; - int ref; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - const int idx = xd->block_refs[ref]->idx; - BufferPool *const pool = pbi->common.buffer_pool; - RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; - const int is_scaled = vp9_is_scaled(sf); - - if (sb_type < BLOCK_8X8) { + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; + const BLOCK_SIZE sb_type = mi->sb_type; + const int is_compound = has_second_ref(mi); + int ref; + int is_scaled; + VPxWorker *const fwo = pbi->frame_parallel_decode ? + pbi->frame_worker_owner : NULL; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; + const struct scale_factors *const sf = &ref_buf->sf; + const int idx = ref_buf->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + + if (!vp9_is_valid_scale(sf)) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + + is_scaled = vp9_is_scaled(sf); + vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, + is_scaled ? sf : NULL); + xd->block_refs[ref] = ref_buf; + + if (sb_type < BLOCK_8X8) { + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; int i = 0, x, y; for (y = 0; y < num_4x4_h; ++y) { for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, i++); - dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } - } else { - const MV mv = mi->mbmi.mv[ref].as_mv; - dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, + } + } else { + const MV mv = mi->mv[ref].as_mv; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, - sf, pre_buf, dst_buf, &mv, ref_frame_buf, - is_scaled, ref); + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); } } } } -static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, +static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension const int x = VPXMIN(n4_wl, n4_hl); - return VPXMIN(mbmi->tx_size, x); + return VPXMIN(mi->tx_size, x); } static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { @@ -782,10 +809,10 @@ static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, } } -static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int bw, int bh, int x_mis, int y_mis, - int bwl, int bhl) { +static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int bw, int bh, int x_mis, int y_mis, + int bwl, int bhl) { const int offset = mi_row * cm->mi_stride + mi_col; int x, y; const TileInfo *const tile = &xd->tile; @@ -794,7 +821,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, xd->mi[0] = &cm->mi[offset]; // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of // passing bsize from decode_partition(). - xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) { xd->mi[y * cm->mi_stride + x] = xd->mi[0]; @@ -809,7 +836,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); - return &xd->mi[0]->mbmi; + return xd->mi[0]; } static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, @@ -823,8 +850,8 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); - MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, - bw, bh, x_mis, y_mis, bwl, bhl); + MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis, bwl, bhl); if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = @@ -834,19 +861,19 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } - vpx_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); + vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - if (mbmi->skip) { + if (mi->skip) { dec_reset_skip_context(xd); } - if (!is_inter_block(mbmi)) { + if (!is_inter_block(mi)) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); @@ -856,9 +883,12 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - predict_and_reconstruct_intra_block(xd, r, mbmi, plane, + predict_and_reconstruct_intra_block(xd, r, mi, plane, row, col, tx_size); } } else { @@ -866,15 +896,15 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); // Reconstruction - if (!mbmi->skip) { + if (!mi->skip) { int eobtotal = 0; int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); @@ -884,21 +914,24 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, + eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col, tx_size); } if (!less8x8 && eobtotal == 0) - mbmi->skip = 1; // skip loopfilter + mi->skip = 1; // skip loopfilter } } xd->corrupted |= vpx_reader_has_error(r); if (cm->lf.filter_level) { - vp9_build_mask(cm, mbmi, mi_row, mi_col, bw, bh); + vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); } } @@ -1196,8 +1229,9 @@ static void resize_mv_buffer(VP9_COMMON *cm) { vpx_free(cm->cur_frame->mvs); cm->cur_frame->mi_rows = cm->mi_rows; cm->cur_frame->mi_cols = cm->mi_cols; - cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*cm->cur_frame->mvs)); + CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->cur_frame->mvs))); } static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { @@ -1281,11 +1315,16 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, BufferPool *const pool = cm->buffer_pool; for (i = 0; i < REFS_PER_FRAME; ++i) { if (vpx_rb_read_bit(rb)) { - YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; - width = buf->y_crop_width; - height = buf->y_crop_height; - found = 1; - break; + if (cm->frame_refs[i].idx != INVALID_IDX) { + YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; + width = buf->y_crop_width; + height = buf->y_crop_height; + found = 1; + break; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode frame size"); + } } } @@ -1300,22 +1339,23 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, // has valid dimensions. for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; - has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width, - ref_frame->buf->y_crop_height, - width, height); + has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && + valid_ref_frame_size(ref_frame->buf->y_crop_width, + ref_frame->buf->y_crop_height, + width, height)); } if (!has_valid_ref_frame) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame has invalid size"); for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (!valid_ref_frame_img_fmt( - ref_frame->buf->bit_depth, - ref_frame->buf->subsampling_x, - ref_frame->buf->subsampling_y, - cm->bit_depth, - cm->subsampling_x, - cm->subsampling_y)) + if (ref_frame->idx == INVALID_IDX || + !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, + ref_frame->buf->subsampling_x, + ref_frame->buf->subsampling_y, + cm->bit_depth, + cm->subsampling_x, + cm->subsampling_y)) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame has incompatible color format"); } @@ -1434,7 +1474,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; int mi_row, mi_col; - TileData *tile_data = NULL; + TileWorkerData *tile_data = NULL; if (cm->lf.filter_level && !cm->skip_loop_filter && pbi->lf_worker.data1 == NULL) { @@ -1470,28 +1510,17 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - if (pbi->tile_data == NULL || - (tile_cols * tile_rows) != pbi->total_tiles) { - vpx_free(pbi->tile_data); - CHECK_MEM_ERROR( - cm, - pbi->tile_data, - vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data)))); - pbi->total_tiles = tile_rows * tile_cols; - } - // Load all tile information into tile_data. for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - tile_data = pbi->tile_data + tile_cols * tile_row + tile_col; - tile_data->cm = cm; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; - tile_data->xd.counts = cm->frame_parallel_decoding_mode ? - NULL : &cm->counts; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &cm->counts; vp9_zero(tile_data->dqcoeff); - vp9_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col); + vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); @@ -1507,8 +1536,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; - tile_data = pbi->tile_data + tile_cols * tile_row + col; - vp9_tile_set_col(&tile, tile_data->cm, col); + tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; + vp9_tile_set_col(&tile, cm, col); vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; @@ -1560,7 +1589,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, } // Get last tile data. - tile_data = pbi->tile_data + tile_cols * tile_rows - 1; + tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; if (pbi->frame_parallel_decode) vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); @@ -1645,12 +1674,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const int num_threads = pbi->max_threads; CHECK_MEM_ERROR(cm, pbi->tile_workers, vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); - // Ensure tile data offsets will be properly aligned. This may fail on - // platforms without DECLARE_ALIGNED(). - assert((sizeof(*pbi->tile_worker_data) % 16) == 0); - CHECK_MEM_ERROR(cm, pbi->tile_worker_data, - vpx_memalign(32, num_threads * - sizeof(*pbi->tile_worker_data))); for (n = 0; n < num_threads; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; ++pbi->num_tile_workers; @@ -1666,7 +1689,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = &pbi->tile_worker_data[n]; + TileWorkerData *const tile_data = + &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); tile_data->xd = pbi->mb; tile_data->xd.counts = @@ -1979,6 +2003,8 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, if (!cm->error_resilient_mode) { cm->refresh_frame_context = vpx_rb_read_bit(rb); cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); + if (!cm->frame_parallel_decoding_mode) + vp9_zero(cm->counts); } else { cm->refresh_frame_context = 0; cm->frame_parallel_decoding_mode = 1; @@ -2082,43 +2108,6 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, return vpx_reader_has_error(&r); } -#ifdef NDEBUG -#define debug_check_frame_counts(cm) (void)0 -#else // !NDEBUG -// Counts should only be incremented when frame_parallel_decoding_mode and -// error_resilient_mode are disabled. -static void debug_check_frame_counts(const VP9_COMMON *const cm) { - FRAME_COUNTS zero_counts; - vp9_zero(zero_counts); - assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode); - assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode, - sizeof(cm->counts.y_mode))); - assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode, - sizeof(cm->counts.uv_mode))); - assert(!memcmp(cm->counts.partition, zero_counts.partition, - sizeof(cm->counts.partition))); - assert(!memcmp(cm->counts.coef, zero_counts.coef, - sizeof(cm->counts.coef))); - assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch, - sizeof(cm->counts.eob_branch))); - assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp, - sizeof(cm->counts.switchable_interp))); - assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode, - sizeof(cm->counts.inter_mode))); - assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter, - sizeof(cm->counts.intra_inter))); - assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter, - sizeof(cm->counts.comp_inter))); - assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref, - sizeof(cm->counts.single_ref))); - assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref, - sizeof(cm->counts.comp_ref))); - assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); - assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); - assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); -} -#endif // NDEBUG - static struct vpx_read_bit_buffer *init_read_bit_buffer( VP9Decoder *pbi, struct vpx_read_bit_buffer *rb, @@ -2202,8 +2191,6 @@ void vp9_decode_frame(VP9Decoder *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Uninitialized entropy context."); - vp9_zero(cm->counts); - xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); if (new_fb->corrupted) @@ -2232,6 +2219,19 @@ void vp9_decode_frame(VP9Decoder *pbi, vp9_frameworker_unlock_stats(worker); } + if (pbi->tile_worker_data == NULL || + (tile_cols * tile_rows) != pbi->total_tiles) { + const int num_tile_workers = tile_cols * tile_rows + + ((pbi->max_threads > 1) ? pbi->max_threads : 0); + const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); + // Ensure tile data offsets will be properly aligned. This may fail on + // platforms without DECLARE_ALIGNED(). + assert((sizeof(*pbi->tile_worker_data) % 16) == 0); + vpx_free(pbi->tile_worker_data); + CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); + pbi->total_tiles = tile_rows * tile_cols; + } + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); @@ -2259,8 +2259,6 @@ void vp9_decode_frame(VP9Decoder *pbi, vp9_adapt_mode_probs(cm); vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } - } else { - debug_check_frame_counts(cm); } } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index d3ca7b3fe..ffc6839ad 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -81,10 +81,10 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, return (TX_SIZE)tx_size; } -static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - int allow_select, vpx_reader *r) { +static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + int allow_select, vpx_reader *r) { TX_MODE tx_mode = cm->tx_mode; - BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, max_tx_size, r); @@ -149,17 +149,12 @@ static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, } static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { struct segmentation *const seg = &cm->seg; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; int predicted_segment_id, segment_id; const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = xd->plane[0].n4_w >> 1; - const int bh = xd->plane[0].n4_h >> 1; - - // TODO(slavarnway): move x_mis, y_mis into xd ????? - const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); - const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); if (!seg->enabled) return 0; // Default for disabled segmentation @@ -176,9 +171,9 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (seg->temporal_update) { const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); - mbmi->seg_id_predicted = vpx_read(r, pred_prob); - segment_id = mbmi->seg_id_predicted ? predicted_segment_id - : read_segment_id(r, seg); + mi->seg_id_predicted = vpx_read(r, pred_prob); + segment_id = mi->seg_id_predicted ? predicted_segment_id + : read_segment_id(r, seg); } else { segment_id = read_segment_id(r, seg); } @@ -202,52 +197,46 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { MODE_INFO *const mi = xd->mi[0]; - MB_MODE_INFO *const mbmi = &mi->mbmi; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; - const BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE bsize = mi->sb_type; int i; const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = xd->plane[0].n4_w >> 1; - const int bh = xd->plane[0].n4_h >> 1; - // TODO(slavarnway): move x_mis, y_mis into xd ????? - const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); - const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); - - mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); - mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(cm, xd, 1, r); - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->ref_frame[1] = NONE; + mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, 1, r); + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) mi->bmi[i].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); - mbmi->mode = mi->bmi[3].as_mode; + mi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); break; case BLOCK_8X4: mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); break; default: - mbmi->mode = read_intra_mode(r, - get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->mode = read_intra_mode(r, + get_y_mode_probs(mi, above_mi, left_mi, 0)); } - mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]); + mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); } static int read_mv_component(vpx_reader *r, @@ -289,7 +278,7 @@ static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE joint_type = (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); - const int use_hp = allow_hp && vp9_use_mv_hp(ref); + const int use_hp = allow_hp && use_mv_hp(ref); MV diff = {0, 0}; if (mv_joint_vertical(joint_type)) @@ -364,11 +353,36 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } +// TODO(slavarnway): Move this decoder version of +// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the +// encoder. +// +// Returns a context number for the given MB prediction signal +static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi ? above_mi->interp_filter + : SWITCHABLE_FILTERS; + + if (left_type == above_type) + return left_type; + else if (left_type == SWITCHABLE_FILTERS) + return above_type; + else if (above_type == SWITCHABLE_FILTERS) + return left_type; + else + return SWITCHABLE_FILTERS; +} static INLINE INTERP_FILTER read_switchable_interp_filter( VP9_COMMON *const cm, MACROBLOCKD *const xd, vpx_reader *r) { - const int ctx = vp9_get_pred_context_switchable_interp(xd); + const int ctx = dec_get_pred_context_switchable_interp(xd); const INTERP_FILTER type = (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]); @@ -381,36 +395,39 @@ static INLINE INTERP_FILTER read_switchable_interp_filter( static void read_intra_block_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, MODE_INFO *mi, vpx_reader *r) { - MB_MODE_INFO *const mbmi = &mi->mbmi; - const BLOCK_SIZE bsize = mi->mbmi.sb_type; + const BLOCK_SIZE bsize = mi->sb_type; int i; - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->ref_frame[1] = NONE; - switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); - mbmi->mode = mi->bmi[3].as_mode; + mi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, r, 0); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode_y(cm, xd, r, 0); break; case BLOCK_8X4: mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, r, 0); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode_y(cm, xd, r, 0); break; default: - mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); + mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); } - mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode); + mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); + + // Initialize interp_filter here so we do not have to check for inter block + // modes in dec_get_pred_context_switchable_interp() + mi->interp_filter = SWITCHABLE_FILTERS; + + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; } static INLINE int is_mv_valid(const MV *mv) { @@ -418,10 +435,18 @@ static INLINE int is_mv_valid(const MV *mv) { mv->col > MV_LOW && mv->col < MV_UPP; } +static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +static INLINE void zero_mv_pair(int_mv *dst) { + memset(dst, 0, sizeof(*dst) * 2); +} + static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, PREDICTION_MODE mode, int_mv mv[2], int_mv ref_mv[2], - int_mv nearest_mv[2], int_mv near_mv[2], + int_mv near_nearest_mv[2], int is_compound, int allow_hp, vpx_reader *r) { int i; int ret = 1; @@ -437,22 +462,13 @@ static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, } break; } + case NEARMV: case NEARESTMV: { - mv[0].as_int = nearest_mv[0].as_int; - if (is_compound) - mv[1].as_int = nearest_mv[1].as_int; - break; - } - case NEARMV: { - mv[0].as_int = near_mv[0].as_int; - if (is_compound) - mv[1].as_int = near_mv[1].as_int; + copy_mv_pair(mv, near_nearest_mv); break; } case ZEROMV: { - mv[0].as_int = 0; - if (is_compound) - mv[1].as_int = 0; + zero_mv_pair(mv); break; } default: { @@ -467,7 +483,7 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; } else { - const int ctx = vp9_get_intra_inter_context(xd); + const int ctx = get_intra_inter_context(xd); const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) @@ -476,44 +492,295 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } +static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, + int refmv_count) { + int i; + + // Make sure all the candidates are properly clamped etc + for (i = 0; i < refmv_count; ++i) { + lower_mv_precision(&mvlist[i].as_mv, allow_hp); + *best_mv = mvlist[i]; + } +} + static void fpm_sync(void *const data, int mi_row) { VP9Decoder *const pbi = (VP9Decoder *)data; vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, mi_row << MI_BLOCK_SIZE_LOG2); } +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector or early_break +// it will also skip all additional processing and jump to Done! +#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != (mv_ref_list)[0].as_int) { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + refmv_count++; \ + goto Done; \ + } \ + } else { \ + (mv_ref_list)[(refmv_count)++] = (mv); \ + if (early_break) \ + goto Done; \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ + refmv_count, mv_ref_list, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + } \ + } while (0) + +// This function searches the neighborhood of a given MB/SB +// to try and find candidate reference vectors. +static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, + const POSITION *const mv_ref_search, + int_mv *mv_ref_list, + int mi_row, int mi_col, int block, int is_sub8x8, + find_mv_refs_sync sync, void *const data) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + int different_ref_found = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const TileInfo *const tile = &xd->tile; + // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop + // searching after the first mv is found. + const int early_break = (mode != NEARMV); + + // Blank the reference vector list + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + i = 0; + if (is_sub8x8) { + // If the size < 8x8 we get the mv from the bmi substructure for the + // nearest two blocks. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + } + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast + // on windows platform. The sync here is unnecessary if use_prev_frame_mvs + // is 0. But after removing it, there will be hang in the unit test on windows + // due to several threads waiting for a thread's signal. +#if defined(_WIN32) && !HAVE_PTHREAD_H + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } +#endif + + // Check the last frame's mode and mv info. + if (prev_frame_mvs) { + // Synchronize here for frame parallel decode if sync function is provided. + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } + + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, + refmv_count, mv_ref_list, Done); + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + } + + if (mode == NEARMV) + refmv_count = MAX_MV_REF_CANDIDATES; + else + // we only care about the nearestmv for the remaining modes + refmv_count = 1; + + Done: + // Clamp vectors + for (i = 0; i < refmv_count; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); + + return refmv_count; +} + +static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + PREDICTION_MODE b_mode, int block, + int ref, int mi_row, int mi_col, + int_mv *best_sub8x8) { + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *const mi = xd->mi[0]; + b_mode_info *bmi = mi->bmi; + int n; + int refmv_count; + + assert(MAX_MV_REF_CANDIDATES == 2); + + refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], + mv_ref_search, mv_list, mi_row, mi_col, block, + 1, NULL, NULL); + + switch (block) { + case 0: + best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; + break; + case 1: + case 2: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; + } else { + best_sub8x8->as_int = 0; + for (n = 0; n < refmv_count; ++n) + if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { + best_sub8x8->as_int = mv_list[n].as_int; + break; + } + } + break; + case 3: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; + } else { + int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; + candidates[0] = bmi[1].as_mv[ref]; + candidates[1] = bmi[0].as_mv[ref]; + candidates[2] = mv_list[0]; + candidates[3] = mv_list[1]; + best_sub8x8->as_int = 0; + for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) + if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { + best_sub8x8->as_int = candidates[n].as_int; + break; + } + } + break; + default: + assert(0 && "Invalid block index."); + } +} + +static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + int mi_row, int mi_col) { + int i; + int context_counter = 0; + const TileInfo *const tile = &xd->tile; + + // Get mode count from nearest 2 blocks + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mode]; + } + } + + return counter_to_context[context_counter]; +} + static void read_inter_block_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, MODE_INFO *const mi, int mi_row, int mi_col, vpx_reader *r) { VP9_COMMON *const cm = &pbi->common; - MB_MODE_INFO *const mbmi = &mi->mbmi; - const BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE bsize = mi->sb_type; const int allow_hp = cm->allow_high_precision_mv; - int_mv nearestmv[2], nearmv[2]; - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; + int_mv best_ref_mvs[2]; int ref, is_compound; - uint8_t inter_mode_ctx[MAX_REF_FRAMES]; - - read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); - is_compound = has_second_ref(mbmi); - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; - RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; - - xd->block_refs[ref] = ref_buf; - if ((!vp9_is_valid_scale(&ref_buf->sf))) - vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, - "Reference frame has invalid dimensions"); - vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, - &ref_buf->sf); - vp9_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame], - mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx); - } + uint8_t inter_mode_ctx; + const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; + + read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); + is_compound = has_second_ref(mi); + inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - mbmi->mode = ZEROMV; + if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { + mi->mode = ZEROMV; if (bsize < BLOCK_8X8) { vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, "Invalid usage of segement feature on small blocks"); @@ -521,18 +788,31 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } } else { if (bsize >= BLOCK_8X8) - mbmi->mode = read_inter_mode(cm, xd, r, - inter_mode_ctx[mbmi->ref_frame[0]]); - } - - if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]], - &nearestmv[ref], &nearmv[ref]); + mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); + else + // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. + // Setting mode to NEARESTMV forces the search to stop after the nearestmv + // has been found. After b_modes have been read, mode will be overwritten + // by the last b_mode. + mi->mode = NEARESTMV; + + if (mi->mode != ZEROMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + int refmv_count; + + refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, + tmp_mvs, mi_row, mi_col, -1, 0, + fpm_sync, (void *)pbi); + + dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], + refmv_count); + } } } - mbmi->interp_filter = (cm->interp_filter == SWITCHABLE) + mi->interp_filter = (cm->interp_filter == SWITCHABLE) ? read_switchable_interp_filter(cm, xd, r) : cm->interp_filter; @@ -541,33 +821,24 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, const int num_4x4_h = 1 << xd->bmode_blocks_hl; int idx, idy; PREDICTION_MODE b_mode; - int_mv nearest_sub8x8[2], near_sub8x8[2]; + int_mv best_sub8x8[2]; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { - int_mv block[2]; const int j = idy * 2 + idx; - b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]); + b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) { - uint8_t dummy_mode_ctx[MAX_REF_FRAMES]; for (ref = 0; ref < 1 + is_compound; ++ref) - vp9_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col, - &nearest_sub8x8[ref], - &near_sub8x8[ref], - dummy_mode_ctx); + append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, + mi_row, mi_col, &best_sub8x8[ref]); } - if (!assign_mv(cm, xd, b_mode, block, nearestmv, - nearest_sub8x8, near_sub8x8, - is_compound, allow_hp, r)) { + if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, + best_sub8x8, is_compound, allow_hp, r)) { xd->corrupted |= 1; break; } - mi->bmi[j].as_mv[0].as_int = block[0].as_int; - if (is_compound) - mi->bmi[j].as_mv[1].as_int = block[1].as_int; - if (num_4x4_h == 2) mi->bmi[j + 2] = mi->bmi[j]; if (num_4x4_w == 2) @@ -575,30 +846,28 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } } - mi->mbmi.mode = b_mode; + mi->mode = b_mode; - mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + copy_mv_pair(mi->mv, mi->bmi[3].as_mv); } else { - xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv, - nearestmv, nearmv, is_compound, allow_hp, r); + xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, + best_ref_mvs, is_compound, allow_hp, r); } } static void read_inter_frame_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r) { + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { VP9_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; - MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; - mbmi->mv[0].as_int = 0; - mbmi->mv[1].as_int = 0; - mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); - mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); + mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, + y_mis); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); if (inter_block) read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); @@ -606,7 +875,12 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi, read_intra_block_mode_info(cm, xd, mi, r); } -void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, +static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, + const MV_REFERENCE_FRAME *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis) { VP9_COMMON *const cm = &pbi->common; @@ -615,19 +889,23 @@ void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, int w, h; if (frame_is_intra_only(cm)) { - read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); + read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); } else { - read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); for (h = 0; h < y_mis; ++h) { - MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { - MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->mbmi.mv[1].as_int; + MV_REF *const mv = frame_mvs + w; + copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); + copy_mv_pair(mv->mv, mi->mv); } + frame_mvs += cm->mi_cols; } } +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && + !is_inter_block(mi) && need_top_left[mi->uv_mode]) + assert(0); +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH } diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h index 75f568cf1..45569ec81 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.h +++ b/libvpx/vp9/decoder/vp9_decodemv.h @@ -19,7 +19,7 @@ extern "C" { #endif -void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis); diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c index 4e88819b1..935c04f3a 100644 --- a/libvpx/vp9/decoder/vp9_decoder.c +++ b/libvpx/vp9/decoder/vp9_decoder.c @@ -131,11 +131,12 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); - vpx_free(pbi->tile_data); + for (i = 0; i < pbi->num_tile_workers; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; vpx_get_worker_interface()->end(worker); } + vpx_free(pbi->tile_worker_data); vpx_free(pbi->tile_workers); @@ -213,8 +214,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, // Find an empty frame buffer. const int free_fb = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) - return VPX_CODEC_MEM_ERROR; + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } // Decrease ref_count since it will be increased again in // ref_cnt_fb() below. @@ -243,7 +247,7 @@ static void swap_frame_buffers(VP9Decoder *pbi) { decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. - if ((mask & 1) && old_idx >= 0) { + if (mask & 1) { decrease_ref_count(old_idx, frame_bufs, pool); } cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; @@ -305,8 +309,11 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) - return VPX_CODEC_MEM_ERROR; + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } // Assign a MV array to the frame buffer. cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; @@ -350,7 +357,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. - if ((mask & 1) && old_idx >= 0) { + if (mask & 1) { decrease_ref_count(old_idx, frame_bufs, pool); } ++ref_index; @@ -501,7 +508,7 @@ vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, uint32_t this_sz = 0; for (j = 0; j < mag; ++j) - this_sz |= (*x++) << (j * 8); + this_sz |= ((uint32_t)(*x++)) << (j * 8); sizes[i] = this_sz; } *count = frames; diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h index 4a5188f8f..7111a36d3 100644 --- a/libvpx/vp9/decoder/vp9_decoder.h +++ b/libvpx/vp9/decoder/vp9_decoder.h @@ -27,15 +27,6 @@ extern "C" { #endif -// TODO(hkuang): combine this with TileWorkerData. -typedef struct TileData { - VP9_COMMON *cm; - vpx_reader bit_reader; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); - /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); -} TileData; - typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -74,8 +65,6 @@ typedef struct VP9Decoder { TileWorkerData *tile_worker_data; TileBuffer tile_buffers[64]; int num_tile_workers; - - TileData *tile_data; int total_tiles; VP9LfSync lf_row_sync; @@ -128,7 +117,7 @@ void vp9_decoder_remove(struct VP9Decoder *pbi); static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { - if (idx >= 0) { + if (idx >= 0 && frame_bufs[idx].ref_count > 0) { --frame_bufs[idx].ref_count; // A worker may only get a free framebuffer index when calling get_free_fb. // But the private buffer is not set up until finish decoding header. diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index 591236530..47dc107fe 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -23,14 +23,6 @@ #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 -#define LOW_VAL_CONTEXT_NODE 0 -#define TWO_CONTEXT_NODE 1 -#define THREE_CONTEXT_NODE 2 -#define HIGH_LOW_CONTEXT_NODE 3 -#define CAT_ONE_CONTEXT_NODE 4 -#define CAT_THREEFOUR_CONTEXT_NODE 5 -#define CAT_THREE_CONTEXT_NODE 6 -#define CAT_FIVE_CONTEXT_NODE 7 #define INCREMENT_COUNT(token) \ do { \ @@ -53,7 +45,7 @@ static int decode_coefs(const MACROBLOCKD *xd, FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = xd->fc; - const int ref = is_inter_block(&xd->mi[0]->mbmi); + const int ref = is_inter_block(xd->mi[0]); int band, c = 0; const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; @@ -65,52 +57,24 @@ static int decode_coefs(const MACROBLOCKD *xd, const int dq_shift = (tx_size == TX_32X32); int v, token; int16_t dqv = dq[0]; - const uint8_t *cat1_prob; - const uint8_t *cat2_prob; - const uint8_t *cat3_prob; - const uint8_t *cat4_prob; - const uint8_t *cat5_prob; - const uint8_t *cat6_prob; + const uint8_t *const cat6_prob = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : + (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : +#endif // CONFIG_VP9_HIGHBITDEPTH + vp9_cat6_prob; + const int cat6_bits = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? 18 : + (xd->bd == VPX_BITS_10) ? 16 : +#endif // CONFIG_VP9_HIGHBITDEPTH + 14; if (counts) { coef_counts = counts->coef[tx_size][type][ref]; eob_branch_count = counts->eob_branch[tx_size][type][ref]; } -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->bd > VPX_BITS_8) { - if (xd->bd == VPX_BITS_10) { - cat1_prob = vp9_cat1_prob_high10; - cat2_prob = vp9_cat2_prob_high10; - cat3_prob = vp9_cat3_prob_high10; - cat4_prob = vp9_cat4_prob_high10; - cat5_prob = vp9_cat5_prob_high10; - cat6_prob = vp9_cat6_prob_high10; - } else { - cat1_prob = vp9_cat1_prob_high12; - cat2_prob = vp9_cat2_prob_high12; - cat3_prob = vp9_cat3_prob_high12; - cat4_prob = vp9_cat4_prob_high12; - cat5_prob = vp9_cat5_prob_high12; - cat6_prob = vp9_cat6_prob_high12; - } - } else { - cat1_prob = vp9_cat1_prob; - cat2_prob = vp9_cat2_prob; - cat3_prob = vp9_cat3_prob; - cat4_prob = vp9_cat4_prob; - cat5_prob = vp9_cat5_prob; - cat6_prob = vp9_cat6_prob; - } -#else - cat1_prob = vp9_cat1_prob; - cat2_prob = vp9_cat2_prob; - cat3_prob = vp9_cat3_prob; - cat4_prob = vp9_cat4_prob; - cat5_prob = vp9_cat5_prob; - cat6_prob = vp9_cat6_prob; -#endif - while (c < max_eob) { int val = -1; band = *band_translate++; @@ -149,39 +113,22 @@ static int decode_coefs(const MACROBLOCKD *xd, val = token; break; case CATEGORY1_TOKEN: - val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, r); + val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); break; case CATEGORY2_TOKEN: - val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, r); + val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); break; case CATEGORY3_TOKEN: - val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, r); + val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); break; case CATEGORY4_TOKEN: - val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, r); + val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); break; case CATEGORY5_TOKEN: - val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r); + val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); break; case CATEGORY6_TOKEN: -#if CONFIG_VP9_HIGHBITDEPTH - switch (xd->bd) { - case VPX_BITS_8: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r); - break; - case VPX_BITS_10: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, r); - break; - case VPX_BITS_12: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, r); - break; - default: - assert(0); - return -1; - } -#else - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r); -#endif + val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r); break; } } @@ -205,65 +152,73 @@ static int decode_coefs(const MACROBLOCKD *xd, return c; } -// TODO(slavarnway): Decode version of vp9_set_context. Modify vp9_set_context -// after testing is complete, then delete this version. -static -void dec_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - TX_SIZE tx_size, int has_eob, - int aoff, int loff) { - ENTROPY_CONTEXT *const a = pd->above_context + aoff; - ENTROPY_CONTEXT *const l = pd->left_context + loff; - const int tx_size_in_blocks = 1 << tx_size; - - // above - if (has_eob && xd->mb_to_right_edge < 0) { - int i; - const int blocks_wide = pd->n4_w + - (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - int above_contexts = tx_size_in_blocks; - if (above_contexts + aoff > blocks_wide) - above_contexts = blocks_wide - aoff; - - for (i = 0; i < above_contexts; ++i) - a[i] = has_eob; - for (i = above_contexts; i < tx_size_in_blocks; ++i) - a[i] = 0; - } else { - memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); +static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, + int x, int y, unsigned int tx_size_in_blocks) { + if (xd->max_blocks_wide) { + if (tx_size_in_blocks + x > xd->max_blocks_wide) + *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; } - - // left - if (has_eob && xd->mb_to_bottom_edge < 0) { - int i; - const int blocks_high = pd->n4_h + - (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - int left_contexts = tx_size_in_blocks; - if (left_contexts + loff > blocks_high) - left_contexts = blocks_high - loff; - - for (i = 0; i < left_contexts; ++i) - l[i] = has_eob; - for (i = left_contexts; i < tx_size_in_blocks; ++i) - l[i] = 0; - } else { - memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + if (xd->max_blocks_high) { + if (tx_size_in_blocks + y > xd->max_blocks_high) + *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; } } -int vp9_decode_block_tokens(MACROBLOCKD *xd, - int plane, const scan_order *sc, - int x, int y, - TX_SIZE tx_size, vpx_reader *r, +int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, + int x, int y, TX_SIZE tx_size, vpx_reader *r, int seg_id) { struct macroblockd_plane *const pd = &xd->plane[plane]; const int16_t *const dequant = pd->seg_dequant[seg_id]; - const int ctx = get_entropy_context(tx_size, pd->above_context + x, - pd->left_context + y); - const int eob = decode_coefs(xd, get_plane_type(plane), - pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - dec_set_contexts(xd, pd, tx_size, eob > 0, x, y); + int eob; + ENTROPY_CONTEXT *a = pd->above_context + x; + ENTROPY_CONTEXT *l = pd->left_context + y; + int ctx; + int ctx_shift_a = 0; + int ctx_shift_l = 0; + + switch (tx_size) { + case TX_4X4: + ctx = a[0] != 0; + ctx += l[0] != 0; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + a[0] = l[0] = (eob > 0); + break; + case TX_8X8: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); + ctx = !!*(const uint16_t *)a; + ctx += !!*(const uint16_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; + *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; + break; + case TX_16X16: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); + ctx = !!*(const uint32_t *)a; + ctx += !!*(const uint32_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; + *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; + break; + case TX_32X32: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); + // NOTE: casting to uint64_t here is safe because the default memory + // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte + // boundaries. + ctx = !!*(const uint64_t *)a; + ctx += !!*(const uint64_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; + *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; + break; + default: + assert(0 && "Invalid transform size."); + eob = 0; + break; + } + return eob; } - - diff --git a/libvpx/vp9/decoder/vp9_dsubexp.c b/libvpx/vp9/decoder/vp9_dsubexp.c index 4fbc6db47..05b38538a 100644 --- a/libvpx/vp9/decoder/vp9_dsubexp.c +++ b/libvpx/vp9/decoder/vp9_dsubexp.c @@ -29,7 +29,7 @@ static int decode_uniform(vpx_reader *r) { } static int inv_remap_prob(int v, int m) { - static int inv_map_table[MAX_PROB] = { + static uint8_t inv_map_table[MAX_PROB] = { 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, diff --git a/libvpx/vp9/encoder/vp9_aq_360.c b/libvpx/vp9/encoder/vp9_aq_360.c new file mode 100644 index 000000000..7d411f65d --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_360.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "vpx_ports/mem.h" +#include "vpx_ports/system_state.h" + +#include "vp9/encoder/vp9_aq_360.h" +#include "vp9/encoder/vp9_aq_variance.h" + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_rd.h" +#include "vp9/encoder/vp9_segmentation.h" + +static const double rate_ratio[MAX_SEGMENTS] = + {1.0, 0.75, 0.6, 0.5, 0.4, 0.3, 0.25}; + +// Sets segment id 0 for the equatorial region, 1 for temperate region +// and 2 for the polar regions +unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows) { + if (mi_row < mi_rows / 8 || mi_row > mi_rows - mi_rows / 8) + return 2; + else if (mi_row < mi_rows / 4 || mi_row > mi_rows - mi_rows / 4) + return 1; + else + return 0; +} + +void vp9_360aq_frame_setup(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + struct segmentation *seg = &cm->seg; + int i; + + if (frame_is_intra_only(cm) || cm->error_resilient_mode) { + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + + seg->abs_delta = SEGMENT_DELTADATA; + + vpx_clear_system_state(); + + for (i = 0; i < MAX_SEGMENTS; ++i) { + int qindex_delta = + vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, + rate_ratio[i], cm->bit_depth); + + // We don't allow qindex 0 in a segment if the base value is not 0. + // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment + // Q delta is sometimes applied without going back around the rd loop. + // This could lead to an illegal combination of partition size and q. + if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { + qindex_delta = -cm->base_qindex + 1; + } + + // No need to enable SEG_LVL_ALT_Q for this segment. + if (rate_ratio[i] == 1.0) { + continue; + } + + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + } + } +} diff --git a/libvpx/vp9/encoder/vp9_aq_360.h b/libvpx/vp9/encoder/vp9_aq_360.h new file mode 100644 index 000000000..fb861cb05 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_360.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_360_H_ +#define VP9_ENCODER_VP9_AQ_360_H_ + +#include "vp9/encoder/vp9_encoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows); +void vp9_360aq_frame_setup(VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c index 30ec19112..2d979ec70 100644 --- a/libvpx/vp9/encoder/vp9_aq_complexity.c +++ b/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -35,9 +35,6 @@ static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {-3.5, -2.5, -1.5, 100.00, 100.0}, {-3.0, -2.0, -1.0, 100.00, 100.0} }; -#define DEFAULT_COMPLEXITY 64 - - static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) { // Approximate base quatizer (truncated to int) const int base_quant = vp9_ac_quant(q_index, 0, bit_depth) / 4; @@ -51,7 +48,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // Make SURE use of floating point in this function is safe. vpx_clear_system_state(); - if (cm->frame_type == KEY_FRAME || + if (frame_is_intra_only(cm) || cm->error_resilient_mode || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { int segment; @@ -107,7 +104,6 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { #define DEFAULT_LV_THRESH 10.0 #define MIN_DEFAULT_LV_THRESH 8.0 -#define VAR_STRENGTH_STEP 0.25 // Select a segment for the current block. // The choice of segment for a block depends on the ratio of the projected // bits for the block vs a target average and its spatial complexity. diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index 2cd89c0d4..3e1a0a522 100644 --- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -23,73 +23,42 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; - size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); if (cr->map == NULL) { - vpx_free(cr); + vp9_cyclic_refresh_free(cr); return NULL; } last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map); cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size); if (cr->last_coded_q_map == NULL) { - vpx_free(cr); + vp9_cyclic_refresh_free(cr); return NULL; } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); - - consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv); - cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size); - if (cr->consec_zero_mv == NULL) { - vpx_free(cr); - return NULL; - } - memset(cr->consec_zero_mv, 0, consec_zero_mv_size); return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { vpx_free(cr->map); vpx_free(cr->last_coded_q_map); - vpx_free(cr->consec_zero_mv); vpx_free(cr); } -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, - const RATE_CONTROL *rc) { - // Turn off cyclic refresh if bits available per frame is not sufficiently - // larger than bit cost of segmentation. Segment map bit cost should scale - // with number of seg blocks, so compare available bits to number of blocks. - // Average bits available per frame = avg_frame_bandwidth - // Number of (8x8) blocks in frame = mi_rows * mi_cols; - const float factor = 0.25; - const int number_blocks = cm->mi_rows * cm->mi_cols; - // The condition below corresponds to turning off at target bitrates: - // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p. - // Also turn off at very small frame sizes, to avoid too large fraction of - // superblocks to be refreshed per frame. Threshold below is less than QCIF. - if (rc->avg_frame_bandwidth < factor * number_blocks || - number_blocks / 64 < 5) - return 0; - else - return 1; -} - // Check if this coding block, of size bsize, should be considered for refresh // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding // mode, and rate/distortion. static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, - const MB_MODE_INFO *mbmi, + const MODE_INFO *mi, int64_t rate, int64_t dist, int bsize) { - MV mv = mbmi->mv[0].as_mv; + MV mv = mi->mv[0].as_mv; // Reject the block for lower-qp coding if projected distortion // is above the threshold, and any of the following is true: // 1) mode uses large mv @@ -98,12 +67,12 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, if (dist > cr->thresh_dist_sb && (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh || mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh || - !is_inter_block(mbmi))) + !is_inter_block(mi))) return CR_SEGMENT_ID_BASE; else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb && - is_inter_block(mbmi) && - mbmi->mv[0].as_int == 0 && + is_inter_block(mi) && + mi->mv[0].as_int == 0 && cr->rate_boost_fac > 10) // More aggressive delta-q for bigger blocks with zero motion. return CR_SEGMENT_ID_BOOST2; @@ -186,12 +155,13 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const VP9_COMP *cpi, int i, // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, - MB_MODE_INFO *const mbmi, + MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, - int skip) { + int skip, + struct macroblock_plane *const p) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; @@ -199,26 +169,44 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; - const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, - bsize); + int refresh_this_block = candidate_refresh_aq(cr, mi, rate, dist, bsize); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; + int is_skin = 0; + if (refresh_this_block == 0 && + bsize <= BLOCK_16X16 && + cpi->use_skin_detection) { + is_skin = vp9_compute_skin_block(p[0].src.buf, + p[1].src.buf, + p[2].src.buf, + p[0].src.stride, + p[1].src.stride, + bsize, + 0, + 0); + if (is_skin) + refresh_this_block = 1; + } + + if (cpi->oxcf.rc_mode == VPX_VBR && mi->ref_frame[0] == GOLDEN_FRAME) + refresh_this_block = 0; + // If this block is labeled for refresh, check if we should reset the // segment_id. - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { - mbmi->segment_id = refresh_this_block; + if (cyclic_refresh_segment_id_boosted(mi->segment_id)) { + mi->segment_id = refresh_this_block; // Reset segment_id if it will be skipped. if (skip) - mbmi->segment_id = CR_SEGMENT_ID_BASE; + mi->segment_id = CR_SEGMENT_ID_BASE; } // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { + if (cyclic_refresh_segment_id_boosted(mi->segment_id)) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already @@ -237,17 +225,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; cr->map[map_offset] = new_map_value; - cpi->segmentation_map[map_offset] = mbmi->segment_id; + cpi->segmentation_map[map_offset] = mi->segment_id; } } void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, - const MB_MODE_INFO *const mbmi, + const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - MV mv = mbmi->mv[0].as_mv; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); @@ -261,25 +248,18 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, // don't update the map for them. For cases where motion is non-zero or // the reference frame isn't the previous frame, the previous value in // the map for this spatial location is not entirely correct. - if ((!is_inter_block(mbmi) || !mbmi->skip) && - mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { + if ((!is_inter_block(mi) || !mi->skip) && + mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = clamp( - cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ); - } else if (is_inter_block(mbmi) && mbmi->skip && - mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { + cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ); + } else if (is_inter_block(mi) && mi->skip && + mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = VPXMIN( - clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id], + clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ), cr->last_coded_q_map[map_offset]); - // Update the consecutive zero/low_mv count. - if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) { - if (cr->consec_zero_mv[map_offset] < 255) - cr->consec_zero_mv[map_offset]++; - } else { - cr->consec_zero_mv[map_offset] = 0; } } - } } // Update the actual number of blocks that were applied the segment delta q. @@ -305,13 +285,15 @@ void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) { void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - // Set minimum gf_interval for GF update to a multiple (== 2) of refresh - // period. Depending on past encoding stats, GF flag may be reset and update - // may not occur until next baseline_gf_interval. + // Set minimum gf_interval for GF update to a multiple of the refresh period, + // with some max limit. Depending on past encoding stats, GF flag may be + // reset and update may not occur until next baseline_gf_interval. if (cr->percent_refresh > 0) - rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh); + rc->baseline_gf_interval = VPXMIN(4 * (100 / cr->percent_refresh), 40); else rc->baseline_gf_interval = 40; + if (cpi->oxcf.rc_mode == VPX_VBR) + rc->baseline_gf_interval = 20; } // Update some encoding stats (from the just encoded frame). If this frame's @@ -324,42 +306,40 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { int mi_row, mi_col; double fraction_low = 0.0; int low_content_frame = 0; - MODE_INFO **mi = cm->mi_grid_visible; RATE_CONTROL *const rc = &cpi->rc; const int rows = cm->mi_rows, cols = cm->mi_cols; int cnt1 = 0, cnt2 = 0; int force_gf_refresh = 0; - + int flag_force_gf_high_motion = 0; for (mi_row = 0; mi_row < rows; mi_row++) { for (mi_col = 0; mi_col < cols; mi_col++) { - int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0 ? - mi[0]->mbmi.mv[0].as_mv.row : -1 * mi[0]->mbmi.mv[0].as_mv.row; - int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0 ? - mi[0]->mbmi.mv[0].as_mv.col : -1 * mi[0]->mbmi.mv[0].as_mv.col; - - // Calculate the motion of the background. - if (abs_mvr <= 16 && abs_mvc <= 16) { - cnt1++; - if (abs_mvr == 0 && abs_mvc == 0) - cnt2++; + if (flag_force_gf_high_motion == 1) { + int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 ? + mi[0]->mv[0].as_mv.row : -1 * mi[0]->mv[0].as_mv.row; + int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 ? + mi[0]->mv[0].as_mv.col : -1 * mi[0]->mv[0].as_mv.col; + // Calculate the motion of the background. + if (abs_mvr <= 16 && abs_mvc <= 16) { + cnt1++; + if (abs_mvr == 0 && abs_mvc == 0) + cnt2++; + } } mi++; - // Accumulate low_content_frame. if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++; } mi += 8; } - // For video conference clips, if the background has high motion in current // frame because of the camera movement, set this frame as the golden frame. // Use 70% and 5% as the thresholds for golden frame refreshing. // Also, force this frame as a golden update frame if this frame will change // the resolution (resize_pending != 0). if (cpi->resize_pending != 0 || - (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { + (cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { vp9_cyclic_refresh_set_golden_update(cpi); rc->frames_till_gf_update_due = rc->baseline_gf_interval; @@ -368,7 +348,6 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { cpi->refresh_golden_frame = 1; force_gf_refresh = 1; } - fraction_low = (double)low_content_frame / (rows * cols); // Update average. @@ -412,12 +391,20 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { assert(cr->sb_index < sbs_in_frame); i = cr->sb_index; cr->target_num_seg_blocks = 0; - if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) { consec_zero_mv_thresh = 100; + } qindex_thresh = cpi->oxcf.content == VP9E_CONTENT_SCREEN ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex); + // More aggressive settings for noisy content. + if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { + consec_zero_mv_thresh = 80; + qindex_thresh = + VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex), + 7 * cm->base_qindex >> 3); + } do { int sum_map = 0; // Get the mi_row/mi_col corresponding to superblock index i. @@ -442,7 +429,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || - cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { + cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { sum_map++; count_sel++; } @@ -481,29 +468,46 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->percent_refresh = 5; cr->max_qdelta_perc = 50; cr->time_for_refresh = 0; + cr->motion_thresh = 32; + cr->rate_boost_fac = 15; // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4) // periods of the refresh cycle, after a key frame. // Account for larger interval on base layer for temporal layers. if (cr->percent_refresh > 0 && rc->frames_since_key < (4 * cpi->svc.number_temporal_layers) * - (100 / cr->percent_refresh)) + (100 / cr->percent_refresh)) { cr->rate_ratio_qdelta = 3.0; - else + } else { cr->rate_ratio_qdelta = 2.0; + if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { + // Reduce the delta-qp if the estimated source noise is above threshold. + cr->rate_ratio_qdelta = 1.7; + cr->rate_boost_fac = 13; + } + } // Adjust some parameters for low resolutions at low bitrates. if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) { cr->motion_thresh = 4; cr->rate_boost_fac = 10; - } else { - cr->motion_thresh = 32; - cr->rate_boost_fac = 15; } if (cpi->svc.spatial_layer_id > 0) { cr->motion_thresh = 4; cr->rate_boost_fac = 12; } + if (cpi->oxcf.rc_mode == VPX_VBR) { + // To be adjusted for VBR mode, e.g., based on gf period and boost. + // For now use smaller qp-delta (than CBR), no second boosted seg, and + // turn-off (no refresh) on golden refresh (since it's already boosted). + cr->percent_refresh = 10; + cr->rate_ratio_qdelta = 1.5; + cr->rate_boost_fac = 10; + if (cpi->refresh_golden_frame == 1) { + cr->percent_refresh = 0; + cr->rate_ratio_qdelta = 1.0; + } + } } // Setup cyclic background refresh: set delta q and segmentation map. @@ -512,7 +516,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { const RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; - const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); + // TODO(marpan): Look into whether we should reduce the amount/delta-qp + // instead of completely shutting off at low bitrates. For now keep it on. + // const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); + const int apply_cyclic_refresh = 1; if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; // Don't apply refresh on key frame or temporal enhancement layer frames. @@ -526,8 +533,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { if (cm->frame_type == KEY_FRAME) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); - memset(cr->consec_zero_mv, 0, - cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv)); cr->sb_index = 0; } return; @@ -602,7 +607,7 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); - memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols); cr->sb_index = 0; cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; } diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h index a5b38138b..35eea182f 100644 --- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -14,6 +14,8 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" +#include "vp9/encoder/vp9_block.h" +#include "vp9/encoder/vp9_skin_detection.h" #ifdef __cplusplus extern "C" { @@ -51,8 +53,6 @@ struct CYCLIC_REFRESH { signed char *map; // Map of the last q a block was coded at. uint8_t *last_coded_q_map; - // Count on how many consecutive times a block uses ZER0MV for encoding. - uint8_t *consec_zero_mv; // Thresholds applied to the projected rate/distortion of the coding block, // when deciding whether block should be refreshed. int64_t thresh_rate_sb; @@ -91,12 +91,13 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i, // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, - MB_MODE_INFO *const mbmi, + MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize, - int64_t rate, int64_t dist, int skip); + int64_t rate, int64_t dist, int skip, + struct macroblock_plane *const p); void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, - const MB_MODE_INFO *const mbmi, + const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize); diff --git a/libvpx/vp9/encoder/vp9_aq_variance.c b/libvpx/vp9/encoder/vp9_aq_variance.c index 1c99105d1..59ef5faa6 100644 --- a/libvpx/vp9/encoder/vp9_aq_variance.c +++ b/libvpx/vp9/encoder/vp9_aq_variance.c @@ -48,7 +48,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { struct segmentation *seg = &cm->seg; int i; - if (cm->frame_type == KEY_FRAME || + if (frame_is_intra_only(cm) || cm->error_resilient_mode || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { vp9_enable_segmentation(seg); @@ -167,7 +167,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, vp9_64_zeros, 0, bw, bh, &sse, &avg); #endif // CONFIG_VP9_HIGHBITDEPTH var = sse - (((int64_t)avg * avg) / (bw * bh)); - return (256 * var) / (bw * bh); + return (unsigned int)(((uint64_t)256 * var) / (bw * bh)); } else { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -185,7 +185,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); #endif // CONFIG_VP9_HIGHBITDEPTH - return (256 * var) >> num_pels_log2_lookup[bs]; + return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]); } } diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c index 461555438..73a2db09a 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.c +++ b/libvpx/vp9/encoder/vp9_bitstream.c @@ -79,8 +79,8 @@ static void prob_diff_update(const vpx_tree_index *tree, static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *xd, vpx_writer *w) { - TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; - BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + TX_SIZE tx_size = xd->mi[0]->tx_size; + BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); @@ -97,7 +97,7 @@ static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd, if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { - const int skip = mi->mbmi.skip; + const int skip = mi->skip; vpx_write(w, skip, vp9_get_skip_prob(cm, xd)); return skip; } @@ -123,72 +123,66 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w, static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth) { - TOKENEXTRA *p = *tp; - - while (p < stop && p->token != EOSB_TOKEN) { - const int t = p->token; - const struct vp9_token *const a = &vp9_coef_encodings[t]; - int i = 0; - int v = a->value; - int n = a->len; + const TOKENEXTRA *p; + const vp9_extra_bit *const extra_bits = #if CONFIG_VP9_HIGHBITDEPTH - const vp9_extra_bit *b; - if (bit_depth == VPX_BITS_12) - b = &vp9_extra_bits_high12[t]; - else if (bit_depth == VPX_BITS_10) - b = &vp9_extra_bits_high10[t]; - else - b = &vp9_extra_bits[t]; + (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 : + (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 : + vp9_extra_bits; #else - const vp9_extra_bit *const b = &vp9_extra_bits[t]; + vp9_extra_bits; (void) bit_depth; #endif // CONFIG_VP9_HIGHBITDEPTH - /* skip one or two nodes */ - if (p->skip_eob_node) { - n -= p->skip_eob_node; - i = 2 * p->skip_eob_node; + for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) { + if (p->token == EOB_TOKEN) { + vpx_write(w, 0, p->context_tree[0]); + continue; } - - // TODO(jbb): expanding this can lead to big gains. It allows - // much better branch prediction and would enable us to avoid numerous - // lookups and compares. - - // If we have a token that's in the constrained set, the coefficient tree - // is split into two treed writes. The first treed write takes care of the - // unconstrained nodes. The second treed write takes care of the - // constrained nodes. - if (t >= TWO_TOKEN && t < EOB_TOKEN) { - int len = UNCONSTRAINED_NODES - p->skip_eob_node; - int bits = v >> (n - len); - vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i); - vp9_write_tree(w, vp9_coef_con_tree, - vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1], - v, n - len, 0); - } else { - vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i); + vpx_write(w, 1, p->context_tree[0]); + while (p->token == ZERO_TOKEN) { + vpx_write(w, 0, p->context_tree[1]); + ++p; + if (p == stop || p->token == EOSB_TOKEN) { + *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN); + return; + } } - if (b->base_val) { - const int e = p->extra, l = b->len; - - if (l) { - const unsigned char *pb = b->prob; - int v = e >> 1; - int n = l; /* number of bits in v, assumed nonzero */ - - do { - const int bb = (v >> --n) & 1; - vpx_write(w, bb, *pb++); - } while (n); + { + const int t = p->token; + const vpx_prob *const context_tree = p->context_tree; + assert(t != ZERO_TOKEN); + assert(t != EOB_TOKEN); + assert(t != EOSB_TOKEN); + vpx_write(w, 1, context_tree[1]); + if (t == ONE_TOKEN) { + vpx_write(w, 0, context_tree[2]); + vpx_write_bit(w, p->extra & 1); + } else { // t >= TWO_TOKEN && t < EOB_TOKEN + const struct vp9_token *const a = &vp9_coef_encodings[t]; + const int v = a->value; + const int n = a->len; + const int e = p->extra; + vpx_write(w, 1, context_tree[2]); + vp9_write_tree(w, vp9_coef_con_tree, + vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v, + n - UNCONSTRAINED_NODES, 0); + if (t >= CATEGORY1_TOKEN) { + const vp9_extra_bit *const b = &extra_bits[t]; + const unsigned char *pb = b->prob; + int v = e >> 1; + int n = b->len; // number of bits in v, assumed nonzero + do { + const int bb = (v >> --n) & 1; + vpx_write(w, bb, *pb++); + } while (n); + } + vpx_write_bit(w, e & 1); } - - vpx_write_bit(w, e & 1); } - ++p; } - - *tp = p + (p->token == EOSB_TOKEN); + *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN); } static void write_segment_id(vpx_writer *w, const struct segmentation *seg, @@ -200,15 +194,15 @@ static void write_segment_id(vpx_writer *w, const struct segmentation *seg, // This function encodes the reference frame static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd, vpx_writer *w) { - const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const int is_compound = has_second_ref(mbmi); - const int segment_id = mbmi->segment_id; + const MODE_INFO *const mi = xd->mi[0]; + const int is_compound = has_second_ref(mi); + const int segment_id = mi->segment_id; // If segment level coding of this signal is disabled... // or the segment allows multiple reference frame options if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { assert(!is_compound); - assert(mbmi->ref_frame[0] == + assert(mi->ref_frame[0] == get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME)); } else { // does the feature use compound prediction or not @@ -220,13 +214,13 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd, } if (is_compound) { - vpx_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME, + vpx_write(w, mi->ref_frame[0] == GOLDEN_FRAME, vp9_get_pred_prob_comp_ref_p(cm, xd)); } else { - const int bit0 = mbmi->ref_frame[0] != LAST_FRAME; + const int bit0 = mi->ref_frame[0] != LAST_FRAME; vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd)); if (bit0) { - const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; + const int bit1 = mi->ref_frame[0] != GOLDEN_FRAME; vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd)); } } @@ -240,19 +234,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, const MACROBLOCK *const x = &cpi->td.mb; const MACROBLOCKD *const xd = &x->e_mbd; const struct segmentation *const seg = &cm->seg; - const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const PREDICTION_MODE mode = mbmi->mode; - const int segment_id = mbmi->segment_id; - const BLOCK_SIZE bsize = mbmi->sb_type; + const PREDICTION_MODE mode = mi->mode; + const int segment_id = mi->segment_id; + const BLOCK_SIZE bsize = mi->sb_type; const int allow_hp = cm->allow_high_precision_mv; - const int is_inter = is_inter_block(mbmi); - const int is_compound = has_second_ref(mbmi); + const int is_inter = is_inter_block(mi); + const int is_compound = has_second_ref(mi); int skip, ref; if (seg->update_map) { if (seg->temporal_update) { - const int pred_flag = mbmi->seg_id_predicted; + const int pred_flag = mi->seg_id_predicted; vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); vpx_write(w, pred_flag, pred_prob); if (!pred_flag) @@ -286,9 +279,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, } } } - write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]); + write_intra_mode(w, mi->uv_mode, cm->fc->uv_mode_prob[mode]); } else { - const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; + const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx]; write_ref_frames(cm, xd, w); @@ -303,10 +296,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, const int ctx = vp9_get_pred_context_switchable_interp(xd); vp9_write_token(w, vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx], - &switchable_interp_encodings[mbmi->interp_filter]); - ++cpi->interp_filter_selected[0][mbmi->interp_filter]; + &switchable_interp_encodings[mi->interp_filter]); + ++cpi->interp_filter_selected[0][mi->interp_filter]; } else { - assert(mbmi->interp_filter == cm->interp_filter); + assert(mi->interp_filter == cm->interp_filter); } if (bsize < BLOCK_8X8) { @@ -321,7 +314,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, if (b_mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, - &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, + &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc, allow_hp); } } @@ -329,8 +322,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, } else { if (mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) - vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, - &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc, + vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv, + &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc, allow_hp); } } @@ -343,19 +336,18 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd, const MODE_INFO *const mi = mi_8x8[0]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; - const MB_MODE_INFO *const mbmi = &mi->mbmi; - const BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE bsize = mi->sb_type; if (seg->update_map) - write_segment_id(w, seg, mbmi->segment_id); + write_segment_id(w, seg, mi->segment_id); - write_skip(cm, xd, mbmi->segment_id, mi, w); + write_skip(cm, xd, mi->segment_id, mi, w); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT) write_selected_tx_size(cm, xd, w); if (bsize >= BLOCK_8X8) { - write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0)); + write_intra_mode(w, mi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0)); } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; @@ -370,7 +362,7 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd, } } - write_intra_mode(w, mbmi->uv_mode, vp9_kf_uv_mode_prob[mbmi->mode]); + write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]); } static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -388,8 +380,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, (mi_row * cm->mi_cols + mi_col); set_mi_row_col(xd, tile, - mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], - mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], + mi_row, num_8x8_blocks_high_lookup[m->sb_type], + mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { write_mb_modes_kf(cm, xd, xd->mi, w); @@ -441,7 +433,7 @@ static void write_modes_sb(VP9_COMP *cpi, m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; - partition = partition_lookup[bsl][m->mbmi.sb_type]; + partition = partition_lookup[bsl][m->sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { @@ -553,8 +545,8 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi, int u = 0; if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], - old_coef_probs[i][j][k][l], &newp, upd, stepsize); + frame_branch_ct[i][j][k][l][0], oldp, &newp, upd, + stepsize); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); @@ -592,7 +584,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi, if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_coef_probs[i][j][k][l], &newp, upd, stepsize); + *oldp, &newp, upd, stepsize); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], @@ -630,7 +622,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi, if (t == PIVOT_NODE) { s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_coef_probs[i][j][k][l], &newp, upd, stepsize); + *oldp, &newp, upd, stepsize); } else { s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], @@ -899,7 +891,7 @@ static void write_tile_info(const VP9_COMMON *const cm, vpx_wb_write_bit(wb, cm->log2_tile_rows != 1); } -static int get_refresh_mask(VP9_COMP *cpi) { +int vp9_get_refresh_mask(VP9_COMP *cpi) { if (vp9_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term we leave it in the GF slot and, @@ -1115,11 +1107,11 @@ static void write_uncompressed_header(VP9_COMP *cpi, write_bitdepth_colorspace_sampling(cm, wb); } - vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES); + vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); write_frame_size(cm, wb); } else { MV_REFERENCE_FRAME ref_frame; - vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES); + vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX); vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame), diff --git a/libvpx/vp9/encoder/vp9_bitstream.h b/libvpx/vp9/encoder/vp9_bitstream.h index da6b41464..f24d20f31 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.h +++ b/libvpx/vp9/encoder/vp9_bitstream.h @@ -18,6 +18,8 @@ extern "C" { #include "vp9/encoder/vp9_encoder.h" +int vp9_get_refresh_mask(VP9_COMP *cpi); + void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h index fc3478626..069c33564 100644 --- a/libvpx/vp9/encoder/vp9_block.h +++ b/libvpx/vp9/encoder/vp9_block.h @@ -65,12 +65,20 @@ struct macroblock { int skip_optimize; int q_index; + // The equivalent error at the current rdmult of one whole bit (not one + // bitcost unit). int errorperbit; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for large blocks. int sadperbit16; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for sub-8x8 blocks. int sadperbit4; int rddiv; int rdmult; int mb_energy; + int * m_search_count_ptr; + int * ex_search_count_ptr; // These are set to their default values at the beginning, and then adjusted // further in the encoding process. @@ -135,6 +143,13 @@ struct macroblock { // the visual quality at the boundary of moving color objects. uint8_t color_sensitivity[2]; + uint8_t sb_is_skin; + + // Used to save the status of whether a block has a low variance in + // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for + // 32x32, 9~24 for 16x16. + uint8_t variance_low[25]; + void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob); #if CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vp9/encoder/vp9_context_tree.h b/libvpx/vp9/encoder/vp9_context_tree.h index 8e365ce33..86ba03d69 100644 --- a/libvpx/vp9/encoder/vp9_context_tree.h +++ b/libvpx/vp9/encoder/vp9_context_tree.h @@ -60,6 +60,7 @@ typedef struct { #if CONFIG_VP9_TEMPORAL_DENOISING unsigned int newmv_sse; unsigned int zeromv_sse; + unsigned int zeromv_lastref_sse; PREDICTION_MODE best_sse_inter_mode; int_mv best_sse_mv; MV_REFERENCE_FRAME best_reference_frame; diff --git a/libvpx/vp9/encoder/vp9_cost.c b/libvpx/vp9/encoder/vp9_cost.c index e2fbb34aa..5d14742bc 100644 --- a/libvpx/vp9/encoder/vp9_cost.c +++ b/libvpx/vp9/encoder/vp9_cost.c @@ -11,35 +11,38 @@ #include "vp9/encoder/vp9_cost.h" -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, - 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, - 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733, - 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, - 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472, - 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415, - 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, - 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281, - 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246, - 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, - 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, - 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, - 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, - 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63, - 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43, - 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, - 4, 3, 1, 1}; +/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT)) + Begins with a bogus entry for simpler addressing. */ +const uint16_t vp9_prob_cost[256] = { + 4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, + 2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, + 1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, + 1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, + 1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, + 1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947, + 937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832, + 823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732, + 724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644, + 637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566, + 560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495, + 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430, + 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371, + 366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, + 311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, + 260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216, + 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171, + 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129, + 125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89, + 86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51, + 48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15, + 12, 9, 6, 3}; static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i, int c) { const vpx_prob prob = probs[i / 2]; int b; + assert(prob != 0); for (b = 0; b <= 1; ++b) { const int cc = c + vp9_cost_bit(prob, b); const vpx_tree_index ii = tree[i + b]; diff --git a/libvpx/vp9/encoder/vp9_cost.h b/libvpx/vp9/encoder/vp9_cost.h index eac74c40b..0c70b7826 100644 --- a/libvpx/vp9/encoder/vp9_cost.h +++ b/libvpx/vp9/encoder/vp9_cost.h @@ -12,18 +12,22 @@ #define VP9_ENCODER_VP9_COST_H_ #include "vpx_dsp/prob.h" +#include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif -extern const unsigned int vp9_prob_cost[256]; +extern const uint16_t vp9_prob_cost[256]; + +// The factor to scale from cost in bits to cost in vp9_prob_cost units. +#define VP9_PROB_COST_SHIFT 9 #define vp9_cost_zero(prob) (vp9_prob_cost[prob]) -#define vp9_cost_one(prob) vp9_cost_zero(vpx_complement(prob)) +#define vp9_cost_one(prob) vp9_cost_zero(256 - (prob)) -#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vpx_complement(prob) \ +#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? 256 - (prob) \ : (prob)) static INLINE unsigned int cost_branch256(const unsigned int ct[2], diff --git a/libvpx/vp9/encoder/vp9_denoiser.c b/libvpx/vp9/encoder/vp9_denoiser.c index 8623b4225..42d456e89 100644 --- a/libvpx/vp9/encoder/vp9_denoiser.c +++ b/libvpx/vp9/encoder/vp9_denoiser.c @@ -21,12 +21,6 @@ #include "vp9/encoder/vp9_denoiser.h" #include "vp9/encoder/vp9_encoder.h" -/* The VP9 denoiser is similar to that of the VP8 denoiser. While - * choosing the motion vectors / reference frames, the denoiser is run, and if - * it did not modify the signal to much, the denoised block is copied to the - * signal. - */ - #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv); #endif @@ -49,16 +43,19 @@ static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) { } static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) { - return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40); + return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40); } static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising, int motion_magnitude) { if (motion_magnitude > noise_motion_thresh(bs, increase_denoising)) { - return 0; + if (increase_denoising) + return (1 << num_pels_log2_lookup[bs]) << 2; + else + return 0; } else { - return (1 << num_pels_log2_lookup[bs]) * 20; + return (1 << num_pels_log2_lookup[bs]) << 4; } } @@ -183,7 +180,7 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, int mi_col) { - return framebuf + (stride * mi_row * 8) + (mi_col * 8); + return framebuf + (stride * mi_row << 3) + (mi_col << 3); } static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, @@ -193,90 +190,101 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, - int *motion_magnitude, - int is_skin) { - int mv_col, mv_row; + int motion_magnitude, + int is_skin, + int *zeromv_filter, + int consec_zeromv) { int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; MV_REFERENCE_FRAME frame; MACROBLOCKD *filter_mbd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi; - MB_MODE_INFO saved_mbmi; - int i, j; + MODE_INFO *mi = filter_mbd->mi[0]; + MODE_INFO saved_mi; + int i; struct buf_2d saved_dst[MAX_MB_PLANE]; - struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers + struct buf_2d saved_pre[MAX_MB_PLANE]; - mv_col = ctx->best_sse_mv.as_mv.col; - mv_row = ctx->best_sse_mv.as_mv.row; - *motion_magnitude = mv_row * mv_row + mv_col * mv_col; frame = ctx->best_reference_frame; + saved_mi = *mi; - saved_mbmi = *mbmi; + if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4)) + return COPY_BLOCK; - if (is_skin && *motion_magnitude > 16) + // Avoid denoising for small block (unless motion is small). + // Small blocks are selected in variance partition (before encoding) and + // will typically lie on moving areas. + if (denoiser->denoising_level < kDenHigh && + motion_magnitude > 16 && bs <= BLOCK_8X8) return COPY_BLOCK; // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. if (frame != INTRA_FRAME && - sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) { - mbmi->ref_frame[0] = ctx->best_reference_frame; - mbmi->mode = ctx->best_sse_inter_mode; - mbmi->mv[0] = ctx->best_sse_mv; + ctx->newmv_sse != UINT_MAX && + sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { + mi->ref_frame[0] = ctx->best_reference_frame; + mi->mode = ctx->best_sse_inter_mode; + mi->mv[0] = ctx->best_sse_mv; } else { // Otherwise, use the zero reference frame. frame = ctx->best_zeromv_reference_frame; - - mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame; - mbmi->mode = ZEROMV; - mbmi->mv[0].as_int = 0; - + ctx->newmv_sse = ctx->zeromv_sse; + // Bias to last reference. + if (frame != LAST_FRAME && + ((ctx->zeromv_lastref_sse < (5 * ctx->zeromv_sse) >> 2) || + denoiser->denoising_level >= kDenHigh)) { + frame = LAST_FRAME; + ctx->newmv_sse = ctx->zeromv_lastref_sse; + } + mi->ref_frame[0] = frame; + mi->mode = ZEROMV; + mi->mv[0].as_int = 0; ctx->best_sse_inter_mode = ZEROMV; ctx->best_sse_mv.as_int = 0; - ctx->newmv_sse = ctx->zeromv_sse; + *zeromv_filter = 1; + if (denoiser->denoising_level > kDenMedium) { + motion_magnitude = 0; + } } if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { // Restore everything to its original state - *mbmi = saved_mbmi; + *mi = saved_mi; return COPY_BLOCK; } - if (*motion_magnitude > + if (motion_magnitude > (noise_motion_thresh(bs, increase_denoising) << 3)) { // Restore everything to its original state - *mbmi = saved_mbmi; + *mi = saved_mi; return COPY_BLOCK; } // We will restore these after motion compensation. for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - saved_pre[i][j] = filter_mbd->plane[i].pre[j]; - } + saved_pre[i] = filter_mbd->plane[i].pre[0]; saved_dst[i] = filter_mbd->plane[i].dst; } // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser // struct. - for (j = 0; j < 2; ++j) { - filter_mbd->plane[0].pre[j].buf = - block_start(denoiser->running_avg_y[frame].y_buffer, - denoiser->running_avg_y[frame].y_stride, - mi_row, mi_col); - filter_mbd->plane[0].pre[j].stride = - denoiser->running_avg_y[frame].y_stride; - filter_mbd->plane[1].pre[j].buf = - block_start(denoiser->running_avg_y[frame].u_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[1].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - filter_mbd->plane[2].pre[j].buf = - block_start(denoiser->running_avg_y[frame].v_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[2].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - } + filter_mbd->plane[0].pre[0].buf = + block_start(denoiser->running_avg_y[frame].y_buffer, + denoiser->running_avg_y[frame].y_stride, + mi_row, mi_col); + filter_mbd->plane[0].pre[0].stride = + denoiser->running_avg_y[frame].y_stride; + filter_mbd->plane[1].pre[0].buf = + block_start(denoiser->running_avg_y[frame].u_buffer, + denoiser->running_avg_y[frame].uv_stride, + mi_row, mi_col); + filter_mbd->plane[1].pre[0].stride = + denoiser->running_avg_y[frame].uv_stride; + filter_mbd->plane[2].pre[0].buf = + block_start(denoiser->running_avg_y[frame].v_buffer, + denoiser->running_avg_y[frame].uv_stride, + mi_row, mi_col); + filter_mbd->plane[2].pre[0].stride = + denoiser->running_avg_y[frame].uv_stride; + filter_mbd->plane[0].dst.buf = block_start(denoiser->mc_running_avg_y.y_buffer, denoiser->mc_running_avg_y.y_stride, @@ -293,27 +301,26 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, mi_row, mi_col); filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; - vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs); + vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); // Restore everything to its original state - *mbmi = saved_mbmi; + *mi = saved_mi; for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - filter_mbd->plane[i].pre[j] = saved_pre[i][j]; - } + filter_mbd->plane[i].pre[0] = saved_pre[i]; filter_mbd->plane[i].dst = saved_dst[i]; } - mv_row = ctx->best_sse_mv.as_mv.row; - mv_col = ctx->best_sse_mv.as_mv.col; - return FILTER_BLOCK; } -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, +void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx) { + PICK_MODE_CONTEXT *ctx, + VP9_DENOISER_DECISION *denoiser_decision) { + int mv_col, mv_row; int motion_magnitude = 0; + int zeromv_filter = 0; + VP9_DENOISER *denoiser = &cpi->denoiser; VP9_DENOISER_DECISION decision = COPY_BLOCK; YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; @@ -322,36 +329,75 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; int is_skin = 0; - - if (bs <= BLOCK_16X16 && denoiser->denoising_on) { - // Take center pixel in block to determine is_skin. - const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1; - const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1; - const int uv_width_shift = y_width_shift >> 1; - const int uv_height_shift = y_height_shift >> 1; - const int stride = mb->plane[0].src.stride; - const int strideuv = mb->plane[1].src.stride; - const uint8_t ysource = - mb->plane[0].src.buf[y_height_shift * stride + y_width_shift]; - const uint8_t usource = - mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift]; - const uint8_t vsource = - mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift]; - is_skin = vp9_skin_pixel(ysource, usource, vsource); + int consec_zeromv = 0; + mv_col = ctx->best_sse_mv.as_mv.col; + mv_row = ctx->best_sse_mv.as_mv.row; + motion_magnitude = mv_row * mv_row + mv_col * mv_col; + + if (cpi->use_skin_detection && + bs <= BLOCK_32X32 && + denoiser->denoising_level < kDenHigh) { + int motion_level = (motion_magnitude < 16) ? 0 : 1; + // If motion for current block is small/zero, compute consec_zeromv for + // skin detection (early exit in skin detection is done for large + // consec_zeromv when current block has small/zero motion). + consec_zeromv = 0; + if (motion_level == 0) { + VP9_COMMON * const cm = &cpi->common; + int j, i; + // Loop through the 8x8 sub-blocks. + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + consec_zeromv = 100; + for (i = 0; i < ymis; i++) { + for (j = 0; j < xmis; j++) { + int bl_index = block_index + i * cm->mi_cols + j; + consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv); + // No need to keep checking 8x8 blocks if any of the sub-blocks + // has small consec_zeromv (since threshold for no_skin based on + // zero/small motion in skin detection is high, i.e, > 4). + if (consec_zeromv < 4) { + i = ymis; + j = xmis; + } + } + } + } + // TODO(marpan): Compute skin detection over sub-blocks. + is_skin = vp9_compute_skin_block(mb->plane[0].src.buf, + mb->plane[1].src.buf, + mb->plane[2].src.buf, + mb->plane[0].src.stride, + mb->plane[1].src.stride, + bs, + consec_zeromv, + motion_level); + } + if (!is_skin && + denoiser->denoising_level == kDenHigh) { + denoiser->increase_denoising = 1; + } else { + denoiser->increase_denoising = 0; } - if (denoiser->denoising_on) + if (denoiser->denoising_level >= kDenLow) decision = perform_motion_compensation(denoiser, mb, bs, denoiser->increase_denoising, mi_row, mi_col, ctx, - &motion_magnitude, - is_skin); + motion_magnitude, + is_skin, + &zeromv_filter, + consec_zeromv); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride, - 0, bs, motion_magnitude); + denoiser->increase_denoising, + bs, motion_magnitude); } if (decision == FILTER_BLOCK) { @@ -365,6 +411,9 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, num_4x4_blocks_wide_lookup[bs] << 2, num_4x4_blocks_high_lookup[bs] << 2); } + *denoiser_decision = decision; + if (decision == FILTER_BLOCK && zeromv_filter == 1) + *denoiser_decision = FILTER_ZEROMV_BLOCK; } static void copy_frame(YV12_BUFFER_CONFIG * const dest, @@ -401,11 +450,12 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, int resized) { // Copy source into denoised reference buffers on KEY_FRAME or // if the just encoded frame was resized. - if (frame_type == KEY_FRAME || resized != 0) { + if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < MAX_REF_FRAMES; ++i) copy_frame(&denoiser->running_avg_y[i], &src); + denoiser->reset = 0; return; } @@ -443,22 +493,25 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) { ctx->zeromv_sse = UINT_MAX; ctx->newmv_sse = UINT_MAX; + ctx->zeromv_lastref_sse = UINT_MAX; + ctx->best_sse_mv.as_int = 0; } -void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse, +void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PREDICTION_MODE mode, PICK_MODE_CONTEXT *ctx) { - // TODO(tkopp): Use both MVs if possible - if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { + if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { ctx->zeromv_sse = sse; - ctx->best_zeromv_reference_frame = mbmi->ref_frame[0]; + ctx->best_zeromv_reference_frame = mi->ref_frame[0]; + if (mi->ref_frame[0] == LAST_FRAME) + ctx->zeromv_lastref_sse = sse; } - if (mbmi->mv[0].as_int != 0 && sse < ctx->newmv_sse) { + if (mi->mv[0].as_int != 0 && sse < ctx->newmv_sse) { ctx->newmv_sse = sse; ctx->best_sse_inter_mode = mode; - ctx->best_sse_mv = mbmi->mv[0]; - ctx->best_reference_frame = mbmi->ref_frame[0]; + ctx->best_sse_mv = mi->mv[0]; + ctx->best_reference_frame = mi->ref_frame[0]; } } @@ -514,27 +567,12 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, #endif denoiser->increase_denoising = 0; denoiser->frame_buffer_initialized = 1; - vp9_denoiser_init_noise_estimate(denoiser, width, height); + denoiser->denoising_level = kDenLow; + denoiser->prev_denoising_level = kDenLow; + denoiser->reset = 0; return 0; } -void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, - int width, - int height) { - // Denoiser is off by default, i.e., no denoising is performed. - // Noise level is measured periodically, and if observed to be above - // thresh_noise_estimate, then denoising is performed, i.e., denoising_on = 1. - denoiser->denoising_on = 0; - denoiser->noise_estimate = 0; - denoiser->noise_estimate_count = 0; - denoiser->thresh_noise_estimate = 20; - if (width * height >= 1920 * 1080) { - denoiser->thresh_noise_estimate = 70; - } else if (width * height >= 1280 * 720) { - denoiser->thresh_noise_estimate = 40; - } -} - void vp9_denoiser_free(VP9_DENOISER *denoiser) { int i; denoiser->frame_buffer_initialized = 0; @@ -548,117 +586,15 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) { vpx_free_frame_buffer(&denoiser->last_source); } -void vp9_denoiser_update_noise_estimate(VP9_COMP *const cpi) { - const VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int frame_period = 10; - int thresh_consec_zeromv = 8; - unsigned int thresh_sum_diff = 128; - int num_frames_estimate = 20; - int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; - // Estimate of noise level every frame_period frames. - // Estimate is between current source and last source. - if (cm->current_video_frame % frame_period != 0 || - cpi->denoiser.last_source.y_buffer == NULL) { - copy_frame(&cpi->denoiser.last_source, cpi->Source); - return; - } else { - int num_samples = 0; - uint64_t avg_est = 0; - int bsize = BLOCK_16X16; - static const unsigned char const_source[16] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128}; - // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have - // been encoded as zero/small mv at least x consecutive frames, compute - // the variance to update estimate of noise in the source. - const uint8_t *src_y = cpi->Source->y_buffer; - const int src_ystride = cpi->Source->y_stride; - const uint8_t *last_src_y = cpi->denoiser.last_source.y_buffer; - const int last_src_ystride = cpi->denoiser.last_source.y_stride; - const uint8_t *src_u = cpi->Source->u_buffer; - const uint8_t *src_v = cpi->Source->v_buffer; - const int src_uvstride = cpi->Source->uv_stride; - const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1; - const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1; - const int uv_width_shift = y_width_shift >> 1; - const int uv_height_shift = y_height_shift >> 1; - int mi_row, mi_col; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row ++) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col ++) { - // 16x16 blocks, 1/4 sample of frame. - if (mi_row % 4 == 0 && mi_col % 4 == 0) { - int bl_index = mi_row * cm->mi_cols + mi_col; - int bl_index1 = bl_index + 1; - int bl_index2 = bl_index + cm->mi_cols; - int bl_index3 = bl_index2 + 1; - // Only consider blocks that are likely steady background. i.e, have - // been encoded as zero/low motion x (= thresh_consec_zeromv) frames - // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all - // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. - const uint8_t ysource = - src_y[y_height_shift * src_ystride + y_width_shift]; - const uint8_t usource = - src_u[uv_height_shift * src_uvstride + uv_width_shift]; - const uint8_t vsource = - src_v[uv_height_shift * src_uvstride + uv_width_shift]; - int is_skin = vp9_skin_pixel(ysource, usource, vsource); - if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv && - cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv && - !is_skin) { - // Compute variance. - unsigned int sse; - unsigned int variance = cpi->fn_ptr[bsize].vf(src_y, - src_ystride, - last_src_y, - last_src_ystride, - &sse); - // Only consider this block as valid for noise measurement if the - // average term (sse - variance = N * avg^{2}, N = 16X16) of the - // temporal residual is small (avoid effects from lighting change). - if ((sse - variance) < thresh_sum_diff) { - unsigned int sse2; - const unsigned int spatial_variance = - cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source, - 0, &sse2); - avg_est += variance / (10 + spatial_variance); - num_samples++; - } - } - } - src_y += 8; - last_src_y += 8; - src_u += 4; - src_v += 4; - } - src_y += (src_ystride << 3) - (cm->mi_cols << 3); - last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3); - src_u += (src_uvstride << 2) - (cm->mi_cols << 2); - src_v += (src_uvstride << 2) - (cm->mi_cols << 2); - } - // Update noise estimate if we have at a minimum number of block samples, - // and avg_est > 0 (avg_est == 0 can happen if the application inputs - // duplicate frames). - if (num_samples > min_blocks_estimate && avg_est > 0) { - // Normalize. - avg_est = (avg_est << 8) / num_samples; - // Update noise estimate. - cpi->denoiser.noise_estimate = (3 * cpi->denoiser.noise_estimate + - avg_est) >> 2; - cpi->denoiser.noise_estimate_count++; - if (cpi->denoiser.noise_estimate_count == num_frames_estimate) { - // Reset counter and check noise level condition. - cpi->denoiser.noise_estimate_count = 0; - if (cpi->denoiser.noise_estimate > cpi->denoiser.thresh_noise_estimate) - cpi->denoiser.denoising_on = 1; - else - cpi->denoiser.denoising_on = 0; - } - } - } - copy_frame(&cpi->denoiser.last_source, cpi->Source); +void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, + int noise_level) { + denoiser->denoising_level = noise_level; + if (denoiser->denoising_level > kDenLowLow && + denoiser->prev_denoising_level == kDenLowLow) + denoiser->reset = 1; + else + denoiser->reset = 0; + denoiser->prev_denoising_level = denoiser->denoising_level; } #ifdef OUTPUT_YUV_DENOISED diff --git a/libvpx/vp9/encoder/vp9_denoiser.h b/libvpx/vp9/encoder/vp9_denoiser.h index f8ad4acd6..9c86e5a93 100644 --- a/libvpx/vp9/encoder/vp9_denoiser.h +++ b/libvpx/vp9/encoder/vp9_denoiser.h @@ -23,21 +23,40 @@ extern "C" { typedef enum vp9_denoiser_decision { COPY_BLOCK, - FILTER_BLOCK + FILTER_BLOCK, + FILTER_ZEROMV_BLOCK } VP9_DENOISER_DECISION; +typedef enum vp9_denoiser_level { + kDenLowLow, + kDenLow, + kDenMedium, + kDenHigh +} VP9_DENOISER_LEVEL; + typedef struct vp9_denoiser { YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; YV12_BUFFER_CONFIG mc_running_avg_y; YV12_BUFFER_CONFIG last_source; int increase_denoising; int frame_buffer_initialized; - int denoising_on; - int noise_estimate; - int thresh_noise_estimate; - int noise_estimate_count; + int reset; + VP9_DENOISER_LEVEL denoising_level; + VP9_DENOISER_LEVEL prev_denoising_level; } VP9_DENOISER; +typedef struct { + int64_t zero_last_cost_orig; + int *ref_frame_cost; + int_mv (*frame_mv)[MAX_REF_FRAMES]; + int reuse_inter_pred; + TX_SIZE best_tx_size; + PREDICTION_MODE best_mode; + MV_REFERENCE_FRAME best_ref_frame; + INTERP_FILTER best_pred_filter; + uint8_t best_mode_skip_txfm; +} VP9_PICKMODE_CTX_DEN; + struct VP9_COMP; void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, @@ -48,13 +67,14 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, int refresh_last_frame, int resized); -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, +void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx); + PICK_MODE_CONTEXT *ctx , + VP9_DENOISER_DECISION *denoiser_decision); void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); -void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, +void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PREDICTION_MODE mode, PICK_MODE_CONTEXT *ctx); @@ -69,18 +89,16 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, // This function is used by both c and sse2 denoiser implementations. // Define it as a static function within the scope where vp9_denoiser.h // is referenced. -static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { +static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs, + int increase_denoising) { return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2); } #endif void vp9_denoiser_free(VP9_DENOISER *denoiser); -void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, - int width, - int height); - -void vp9_denoiser_update_noise_estimate(struct VP9_COMP *const cpi); +void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, + int noise_level); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index 2333a1391..f66ed9ed3 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -33,6 +33,7 @@ #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_aq_variance.h" @@ -133,7 +134,7 @@ unsigned int vp9_high_get_sby_perpixel_variance( 0, &sse); break; } - return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); + return ROUND_POWER_OF_TWO((int64_t)var, num_pels_log2_lookup[bs]); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -186,7 +187,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; + MODE_INFO *mi; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const struct segmentation *const seg = &cm->seg; @@ -195,7 +196,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, set_mode_info_offsets(cm, x, xd, mi_row, mi_col); - mbmi = &xd->mi[0]->mbmi; + mi = xd->mi[0]; // Set up destination pointers. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); @@ -221,16 +222,17 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, // Setup segment ID. if (seg->enabled) { - if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ && + cpi->oxcf.aq_mode != EQUATOR360_AQ) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } vp9_init_plane_quantizers(cpi, x); - x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id]; } else { - mbmi->segment_id = 0; + mi->segment_id = 0; x->encode_breakout = cpi->encode_breakout; } @@ -241,14 +243,16 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { - const int block_width = num_8x8_blocks_wide_lookup[bsize]; - const int block_height = num_8x8_blocks_high_lookup[bsize]; + const int block_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], + cm->mi_cols - mi_col); + const int block_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], + cm->mi_rows - mi_row); + const int mi_stride = xd->mi_stride; + MODE_INFO *const src_mi = xd->mi[0]; int i, j; for (j = 0; j < block_height; ++j) - for (i = 0; i < block_width; ++i) { - if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) - xd->mi[j * xd->mi_stride + i] = xd->mi[0]; - } + for (i = 0; i < block_width; ++i) + xd->mi[j * mi_stride + i] = src_mi; } static void set_block_size(VP9_COMP * const cpi, @@ -258,7 +262,7 @@ static void set_block_size(VP9_COMP * const cpi, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); - xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->sb_type = bsize; } } @@ -401,7 +405,6 @@ static int set_vt_partitioning(VP9_COMP *cpi, variance_node vt; const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; - const int low_res = (cm->width <= 352 && cm->height <= 288); assert(block_height == block_width); tree_to_node(data, bsize, &vt); @@ -414,7 +417,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, // No check for vert/horiz split as too few samples for variance. if (bsize == bsize_min) { // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && @@ -425,7 +428,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, return 0; } else if (bsize > bsize_min) { // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); // For key frame: take split for bsize above 32X32 or very high variance. if (cm->frame_type == KEY_FRAME && @@ -481,7 +484,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { VP9_COMMON *const cm = &cpi->common; const int is_key_frame = (cm->frame_type == KEY_FRAME); const int threshold_multiplier = is_key_frame ? 20 : 1; - const int64_t threshold_base = (int64_t)(threshold_multiplier * + int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); if (is_key_frame) { thresholds[0] = threshold_base; @@ -489,9 +492,20 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { thresholds[2] = threshold_base >> 2; thresholds[3] = threshold_base << 2; } else { - thresholds[1] = threshold_base; + // Increase base variance threshold based on estimated noise level. + if (cpi->noise_estimate.enabled) { + NOISE_LEVEL noise_level = vp9_noise_estimate_extract_level( + &cpi->noise_estimate); + if (noise_level == kHigh) + threshold_base = 3 * threshold_base; + else if (noise_level == kMedium) + threshold_base = threshold_base << 1; + else if (noise_level < kLow) + threshold_base = (7 * threshold_base) >> 3; + } if (cm->width <= 352 && cm->height <= 288) { - thresholds[0] = threshold_base >> 2; + thresholds[0] = threshold_base >> 3; + thresholds[1] = threshold_base >> 1; thresholds[2] = threshold_base << 3; } else { thresholds[0] = threshold_base; @@ -518,7 +532,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) { cpi->vbp_bsize_min = BLOCK_8X8; } else { if (cm->width <= 352 && cm->height <= 288) - cpi->vbp_threshold_sad = 100; + cpi->vbp_threshold_sad = 10; else cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ? (cpi->y_dequant[q][1] << 1) : 1000; @@ -548,16 +562,16 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, if (x8_idx < pixels_wide && y8_idx < pixels_high) { #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - vp9_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); } else { - vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); } #else - vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp, + vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); #endif @@ -589,18 +603,18 @@ static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); + s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) - d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); + d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); } else { - s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) - d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); + d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); } #else - s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) - d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); + d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); #endif sum = s_avg - d_avg; sse = sum * sum; @@ -628,18 +642,18 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); + s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) - d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); + d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); } else { - s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); + s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) - d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); + d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); } #else - s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); + s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) - d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); + d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); #endif sum = s_avg - d_avg; sse = sum * sum; @@ -648,45 +662,177 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, } } +#if !CONFIG_VP9_HIGHBITDEPTH +// Check if most of the superblock is skin content, and if so, force split to +// 32x32, and set x->sb_is_skin for use in mode selection. +static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, + int mi_row, int mi_col, int *force_split) { + VP9_COMMON * const cm = &cpi->common; + // Avoid checking superblocks on/near boundary and avoid low resolutions. + // Note superblock may still pick 64X64 if y_sad is very small + // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. + if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && + mi_row + 8 < cm->mi_rows)) { + int num_16x16_skin = 0; + int num_16x16_nonskin = 0; + uint8_t *ysignal = x->plane[0].src.buf; + uint8_t *usignal = x->plane[1].src.buf; + uint8_t *vsignal = x->plane[2].src.buf; + int sp = x->plane[0].src.stride; + int spuv = x->plane[1].src.stride; + const int block_index = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + // Loop through the 16x16 sub-blocks. + int i, j; + for (i = 0; i < ymis; i+=2) { + for (j = 0; j < xmis; j+=2) { + int bl_index = block_index + i * cm->mi_cols + j; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); + int is_skin = vp9_compute_skin_block(ysignal, + usignal, + vsignal, + sp, + spuv, + BLOCK_16X16, + consec_zeromv, + 0); + num_16x16_skin += is_skin; + num_16x16_nonskin += (1 - is_skin); + if (num_16x16_nonskin > 3) { + // Exit loop if at least 4 of the 16x16 blocks are not skin. + i = ymis; + break; + } + ysignal += 16; + usignal += 8; + vsignal += 8; + } + ysignal += (sp << 4) - 64; + usignal += (spuv << 3) - 32; + vsignal += (spuv << 3) - 32; + } + if (num_16x16_skin > 12) { + *force_split = 1; + return 1; + } + } + return 0; +} +#endif + +static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, + MACROBLOCKD *xd, v64x64 *vt, + int force_split[], int64_t thresholds[], + MV_REFERENCE_FRAME ref_frame_partition, + int mi_col, int mi_row) { + int i, j; + VP9_COMMON * const cm = &cpi->common; + const int mv_thr = cm->width > 640 ? 8 : 4; + // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and + // int_pro mv is small. If the temporal variance is small set the flag + // variance_low for the block. The variance threshold can be adjusted, the + // higher the more aggressive. + if (ref_frame_partition == LAST_FRAME && + (cpi->sf.short_circuit_low_temp_var == 1 || + (xd->mi[0]->mv[0].as_mv.col < mv_thr && + xd->mi[0]->mv[0].as_mv.col > -mv_thr && + xd->mi[0]->mv[0].as_mv.row < mv_thr && + xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { + if (xd->mi[0]->sb_type == BLOCK_64X64 && + (vt->part_variances).none.variance < (thresholds[0] >> 1)) { + x->variance_low[0] = 1; + } else if (xd->mi[0]->sb_type == BLOCK_64X32) { + for (i = 0; i < 2; i++) { + if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) + x->variance_low[i + 1] = 1; + } + } else if (xd->mi[0]->sb_type == BLOCK_32X64) { + for (i = 0; i < 2; i++) { + if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) + x->variance_low[i + 3] = 1; + } + } else { + for (i = 0; i < 4; i++) { + if (!force_split[i + 1]) { + // 32x32 + if (vt->split[i].part_variances.none.variance < + (thresholds[1] >> 1)) + x->variance_low[i + 5] = 1; + } else if (cpi->sf.short_circuit_low_temp_var == 2) { + int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4}; + const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i]; + MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; + // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block + // inside. + if ((*this_mi)->sb_type == BLOCK_16X16 || + (*this_mi)->sb_type == BLOCK_32X16 || + (*this_mi)->sb_type == BLOCK_16X32) { + for (j = 0; j < 4; j++) { + if (vt->split[i].split[j].part_variances.none.variance < + (thresholds[2] >> 8)) + x->variance_low[(i << 2) + j + 9] = 1; + } + } + } + } + } + } +} + // This function chooses partitioning based on the variance between source and // reconstructed last, where variance is computed for down-sampled inputs. static int choose_partitioning(VP9_COMP *cpi, - const TileInfo *const tile, - MACROBLOCK *x, - int mi_row, int mi_col) { + const TileInfo *const tile, + MACROBLOCK *x, + int mi_row, int mi_col) { VP9_COMMON * const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int i, j, k, m; v64x64 vt; v16x16 vt2[16]; int force_split[21]; + int avg_32x32; + int avg_16x16[4]; uint8_t *s; const uint8_t *d; int sp; int dp; + // Ref frame used in partitioning. + MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; int pixels_wide = 64, pixels_high = 64; int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; + // For the variance computation under SVC mode, we treat the frame as key if + // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). + const int is_key_frame = (cm->frame_type == KEY_FRAME || + (is_one_pass_cbr_svc(cpi) && + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); // Always use 4x4 partition for key frame. - const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int use_4x4_partition = is_key_frame; + const int use_4x4_partition = cm->frame_type == KEY_FRAME; const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; + int segment_id; - int segment_id = CR_SEGMENT_ID_BASE; + set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); + segment_id = xd->mi[0]->segment_id; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : - cm->last_frame_seg_map; - segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); - if (cyclic_refresh_segment_id_boosted(segment_id)) { int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); set_vbp_thresholds(cpi, thresholds, q); } } - set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); + memset(x->variance_low, 0, sizeof(x->variance_low)); if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); @@ -696,17 +842,20 @@ static int choose_partitioning(VP9_COMP *cpi, s = x->plane[0].src.buf; sp = x->plane[0].src.stride; - if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) && - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, + // 5-20 for the 16x16 blocks. + force_split[0] = 0; + + if (!is_key_frame) { // In the case of spatial/temporal scalable coding, the assumption here is // that the temporal reference frame will always be of type LAST_FRAME. // TODO(marpan): If that assumption is broken, we need to revisit this code. - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; unsigned int uv_sad; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const YV12_BUFFER_CONFIG *yv12_g = NULL; - unsigned int y_sad, y_sad_g; + unsigned int y_sad, y_sad_g, y_sad_thr; const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); @@ -732,25 +881,38 @@ static int choose_partitioning(VP9_COMP *cpi, vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[LAST_FRAME - 1].sf); - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE; - mbmi->sb_type = BLOCK_64X64; - mbmi->mv[0].as_int = 0; - mbmi->interp_filter = BILINEAR; + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE; + mi->sb_type = BLOCK_64X64; + mi->mv[0].as_int = 0; + mi->interp_filter = BILINEAR; y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); - if (y_sad_g < y_sad) { + // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad + // are close if short_circuit_low_temp_var is on. + y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; + if (y_sad_g < y_sad_thr) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); - mbmi->ref_frame[0] = GOLDEN_FRAME; - mbmi->mv[0].as_int = 0; + mi->ref_frame[0] = GOLDEN_FRAME; + mi->mv[0].as_int = 0; y_sad = y_sad_g; + ref_frame_partition = GOLDEN_FRAME; } else { - x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; + x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; + ref_frame_partition = LAST_FRAME; } + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); + x->sb_is_skin = 0; +#if !CONFIG_VP9_HIGHBITDEPTH + if (cpi->use_skin_detection) + x->sb_is_skin = skin_sb_split(cpi, x, low_res, mi_row, mi_col, + &force_split[0]); +#endif + for (i = 1; i <= 2; ++i) { struct macroblock_plane *p = &x->plane[i]; struct macroblockd_plane *pd = &xd->plane[i]; @@ -762,7 +924,9 @@ static int choose_partitioning(VP9_COMP *cpi, uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); - x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); + // TODO(marpan): Investigate if we should lower this threshold if + // superblock is detected as skin. + x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); } d = xd->plane[0].dst.buf; @@ -801,9 +965,6 @@ static int choose_partitioning(VP9_COMP *cpi, #endif // CONFIG_VP9_HIGHBITDEPTH } - // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, - // 5-20 for the 16x16 blocks. - force_split[0] = 0; // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances // for splits. for (i = 0; i < 4; i++) { @@ -811,6 +972,7 @@ static int choose_partitioning(VP9_COMP *cpi, const int y32_idx = ((i >> 1) << 5); const int i2 = i << 2; force_split[i + 1] = 0; + avg_16x16[i] = 0; for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); @@ -828,6 +990,7 @@ static int choose_partitioning(VP9_COMP *cpi, is_key_frame); fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); get_variance(&vt.split[i].split[j].part_variances.none); + avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { // 16X16 variance is above threshold for split, so force split to 8x8 @@ -835,7 +998,8 @@ static int choose_partitioning(VP9_COMP *cpi, force_split[split_index] = 1; force_split[i + 1] = 1; force_split[0] = 1; - } else if (vt.split[i].split[j].part_variances.none.variance > + } else if (cpi->oxcf.speed < 8 && + vt.split[i].split[j].part_variances.none.variance > thresholds[1] && !cyclic_refresh_segment_id_boosted(segment_id)) { // We have some nominal amount of 16x16 variance (based on average), @@ -853,9 +1017,7 @@ static int choose_partitioning(VP9_COMP *cpi, } } } - // TODO(marpan): There is an issue with variance based on 4x4 average in - // svc mode, don't allow it for now. - if (is_key_frame || (low_res && !cpi->use_svc && + if (is_key_frame || (low_res && vt.split[i].split[j].part_variances.none.variance > (thresholds[1] << 1))) { force_split[split_index] = 0; @@ -877,8 +1039,8 @@ static int choose_partitioning(VP9_COMP *cpi, } } } - // Fill the rest of the variance tree by summing split partition values. + avg_32x32 = 0; for (i = 0; i < 4; i++) { const int i2 = i << 2; for (j = 0; j < 4; j++) { @@ -888,22 +1050,41 @@ static int choose_partitioning(VP9_COMP *cpi, for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); fill_variance_tree(vtemp, BLOCK_16X16); + // If variance of this 16x16 block is above the threshold, force block + // to split. This also forces a split on the upper levels. + get_variance(&vtemp->part_variances.none); + if (vtemp->part_variances.none.variance > thresholds[2]) { + force_split[5 + i2 + j] = 1; + force_split[i + 1] = 1; + force_split[0] = 1; + } } } fill_variance_tree(&vt.split[i], BLOCK_32X32); - // If variance of this 32x32 block is above the threshold, force the block - // to split. This also forces a split on the upper (64x64) level. + // If variance of this 32x32 block is above the threshold, or if its above + // (some threshold of) the average variance over the sub-16x16 blocks, then + // force this block to split. This also forces a split on the upper + // (64x64) level. if (!force_split[i + 1]) { get_variance(&vt.split[i].part_variances.none); - if (vt.split[i].part_variances.none.variance > thresholds[1]) { + if (vt.split[i].part_variances.none.variance > thresholds[1] || + (!is_key_frame && + vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && + vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { force_split[i + 1] = 1; force_split[0] = 1; } + avg_32x32 += vt.split[i].part_variances.none.variance; } } if (!force_split[0]) { fill_variance_tree(&vt, BLOCK_64X64); get_variance(&vt.part_variances.none); + // If variance of this 64x64 block is above (some threshold of) the average + // variance over the sub-32x32 blocks, then force this block to split. + if (!is_key_frame && + vt.part_variances.none.variance > (5 * avg_32x32) >> 4) + force_split[0] = 1; } // Now go through the entire structure, splitting every block size until @@ -960,6 +1141,11 @@ static int choose_partitioning(VP9_COMP *cpi, } } } + + if (cpi->sf.short_circuit_low_temp_var) { + set_low_temp_var_flag(cpi, x, xd, &vt, force_split, thresholds, + ref_frame_partition, mi_col, mi_row); + } return 0; } @@ -975,11 +1161,11 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const xdmi = xd->mi[0]; MODE_INFO *mi_addr = xd->mi[0]; const struct segmentation *const seg = &cm->seg; - const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; - const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; + const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; + const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); MV_REF *const frame_mvs = @@ -991,7 +1177,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; - assert(mi->mbmi.sb_type == bsize); + assert(mi->sb_type == bsize); *mi_addr = *mi; *x->mbmi_ext = ctx->mbmi_ext; @@ -1002,19 +1188,19 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mi_addr->mbmi.segment_id = + mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } // Else for cyclic refresh mode update the segment map, set the segment id // and then update the quantizer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, + vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, ctx->rate, ctx->dist, - x->skip); + x->skip, p); } } - max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; + max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; @@ -1038,16 +1224,16 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, xd->mi[x_idx + y * mis] = mi_addr; } - if (cpi->oxcf.aq_mode) + if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); - if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { - mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { + xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; } x->skip = ctx->skip; - memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); if (!output_enabled) @@ -1067,19 +1253,19 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, }; - ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]; + ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; } else { // Note how often each mode chosen as best ++cpi->mode_chosen_counts[ctx->best_mode_index]; } #endif if (!frame_is_intra_only(cm)) { - if (is_inter_block(mbmi)) { + if (is_inter_block(xdmi)) { vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); - ++td->counts->switchable_interp[ctx][mbmi->interp_filter]; + ++td->counts->switchable_interp[ctx][xdmi->interp_filter]; } } @@ -1095,10 +1281,10 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->mbmi.mv[1].as_int; + mv->ref_frame[0] = mi->ref_frame[0]; + mv->ref_frame[1] = mi->ref_frame[1]; + mv->mv[0].as_int = mi->mv[0].as_int; + mv->mv[1].as_int = mi->mv[1].as_int; } } } @@ -1121,26 +1307,23 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, RD_COST *rd_cost, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; INTERP_FILTER filter_ref; - if (xd->up_available) - filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; - else if (xd->left_available) - filter_ref = xd->mi[-1]->mbmi.interp_filter; - else + filter_ref = vp9_get_pred_context_switchable_interp(xd); + if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP; - mbmi->sb_type = bsize; - mbmi->mode = ZEROMV; - mbmi->tx_size = + mi->sb_type = bsize; + mi->mode = ZEROMV; + mi->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); - mbmi->skip = 1; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE; - mbmi->mv[0].as_int = 0; - mbmi->interp_filter = filter_ref; + mi->skip = 1; + mi->uv_mode = DC_PRED; + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE; + mi->mv[0].as_int = 0; + mi->interp_filter = filter_ref; xd->mi[0]->bmi[0].as_mv[0].as_int = 0; x->skip = 1; @@ -1155,8 +1338,7 @@ static int set_segment_rdmult(VP9_COMP *const cpi, VP9_COMMON *const cm = &cpi->common; vp9_init_plane_quantizers(cpi, x); vpx_clear_system_state(); - segment_qindex = vp9_get_qindex(&cm->seg, segment_id, - cm->base_qindex); + segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); } @@ -1169,7 +1351,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; + MODE_INFO *mi; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; const AQ_MODE aq_mode = cpi->oxcf.aq_mode; @@ -1181,8 +1363,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, x->use_lp32x32fdct = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - mbmi = &xd->mi[0]->mbmi; - mbmi->sb_type = bsize; + mi = xd->mi[0]; + mi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][0]; @@ -1196,7 +1378,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats - mbmi->skip = 0; + mi->skip = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -1221,15 +1403,24 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - mbmi->segment_id = vp9_vaq_segment_id(energy); + mi->segment_id = vp9_vaq_segment_id(energy); } else { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } - x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); + x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); + } else if (aq_mode == EQUATOR360_AQ) { + if (cm->frame_type == KEY_FRAME) { + mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); + } else { + const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + } + x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); } else if (aq_mode == COMPLEXITY_AQ) { - x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); + x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); } else if (aq_mode == CYCLIC_REFRESH_AQ) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; @@ -1245,7 +1436,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); } else { if (bsize >= BLOCK_8X8) { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, ctx, best_rd); else @@ -1282,27 +1473,26 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) { const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MODE_INFO *const mi = xd->mi[0]; - const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE bsize = mi->sb_type; if (!frame_is_intra_only(cm)) { FRAME_COUNTS *const counts = td->counts; - const int inter_block = is_inter_block(mbmi); - const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id, + const int inter_block = is_inter_block(mi); + const int seg_ref_active = segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { - counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++; + counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (inter_block) { - const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; + const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; if (cm->reference_mode == REFERENCE_MODE_SELECT) counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] - [has_second_ref(mbmi)]++; + [has_second_ref(mi)]++; - if (has_second_ref(mbmi)) { + if (has_second_ref(mi)) { counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)] [ref0 == GOLDEN_FRAME]++; } else { @@ -1315,10 +1505,10 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) { } } if (inter_block && - !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; + !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { + const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; if (bsize >= BLOCK_8X8) { - const PREDICTION_MODE mode = mbmi->mode; + const PREDICTION_MODE mode = mi->mode; ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; @@ -1520,7 +1710,7 @@ static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, for (c = 0; c < MI_BLOCK_SIZE; c += bw) { const int index = r * mis + c; mi_8x8[index] = mi + index; - mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize, + mi_8x8[index]->sb_type = find_partition_size(bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); } } @@ -1552,7 +1742,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { int index = block_row * mis + block_col; mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = bsize; + mi_8x8[index]->sb_type = bsize; } } } else { @@ -1617,7 +1807,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, index = b_mi_row * mis + b_mi_col; mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; + mi_8x8[index]->sb_type = BLOCK_16X16; // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition // size to further improve quality. @@ -1639,7 +1829,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; + mi_8x8[index]->sb_type = BLOCK_32X32; } } @@ -1651,7 +1841,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, // Use 64x64 partition if (is_larger_better) { mi_8x8[0] = mi_upper_left; - mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + mi_8x8[0]->sb_type = BLOCK_64X64; } } } else { // partial in-image SB64 @@ -1669,46 +1859,47 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = x->plane; const struct segmentation *const seg = &cm->seg; - const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; - const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; + const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; + const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); *(xd->mi[0]) = ctx->mic; *(x->mbmi_ext) = ctx->mbmi_ext; - if (seg->enabled && cpi->oxcf.aq_mode) { + if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) { // For in frame complexity AQ or variance AQ, copy segment_id from // segmentation_map. - if (cpi->oxcf.aq_mode == COMPLEXITY_AQ || - cpi->oxcf.aq_mode == VARIANCE_AQ ) { + if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } else { // Setting segmentation map for cyclic_refresh. - vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize, - ctx->rate, ctx->dist, x->skip); + vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, + ctx->rate, ctx->dist, x->skip, p); } vp9_init_plane_quantizers(cpi, x); } - if (is_inter_block(mbmi)) { + if (is_inter_block(mi)) { vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int pred_ctx = vp9_get_pred_context_switchable_interp(xd); - ++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter]; + ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; } - if (mbmi->sb_type < BLOCK_8X8) { - mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + if (mi->sb_type < BLOCK_8X8) { + mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; } } - if (cm->use_prev_frame_mvs) { + if (cm->use_prev_frame_mvs || + (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 + && cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; @@ -1717,16 +1908,16 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->mbmi.ref_frame[0]; - mv->ref_frame[1] = mi->mbmi.ref_frame[1]; - mv->mv[0].as_int = mi->mbmi.mv[0].as_int; - mv->mv[1].as_int = mi->mbmi.mv[1].as_int; + mv->ref_frame[0] = mi->ref_frame[0]; + mv->ref_frame[1] = mi->ref_frame[1]; + mv->mv[0].as_int = mi->mv[0].as_int; + mv->mv[1].as_int = mi->mv[1].as_int; } } } x->skip = ctx->skip; - x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0]; + x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0]; } static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, @@ -1738,16 +1929,6 @@ static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, set_offsets(cpi, tile, x, mi_row, mi_col, bsize); update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && - output_enabled && - cpi->common.frame_type != KEY_FRAME && - cpi->resize_pending == 0) { - vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, - VPXMAX(BLOCK_8X8, bsize), ctx); - } -#endif - encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); update_stats(&cpi->common, td); @@ -1776,7 +1957,7 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, const int idx_str = xd->mi_stride * mi_row + mi_col; MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - subsize = mi_8x8[0]->mbmi.sb_type; + subsize = mi_8x8[0]->sb_type; } else { ctx = 0; subsize = BLOCK_4X4; @@ -1851,7 +2032,7 @@ static void rd_use_partition(VP9_COMP *cpi, RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; - BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; @@ -1871,7 +2052,7 @@ static void rd_use_partition(VP9_COMP *cpi, pc_tree->partitioning = partition; save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); - if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) { + if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); } @@ -1886,7 +2067,7 @@ static void rd_use_partition(VP9_COMP *cpi, for (i = 0; i < 4; i++) { int jj = i >> 1, ii = i & 0x01; MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; - if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) { + if (this_mi && this_mi->sb_type >= sub_subsize) { splits_below = 0; } } @@ -1910,7 +2091,7 @@ static void rd_use_partition(VP9_COMP *cpi, } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); - mi_8x8[0]->mbmi.sb_type = bs_type; + mi_8x8[0]->sb_type = bs_type; pc_tree->partitioning = partition; } } @@ -2068,7 +2249,7 @@ static void rd_use_partition(VP9_COMP *cpi, // If last_part is better set the partitioning to that. if (last_part_rdc.rdcost < chosen_rdc.rdcost) { - mi_8x8[0]->mbmi.sb_type = bsize; + mi_8x8[0]->sb_type = bsize; if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; @@ -2134,7 +2315,7 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, for (i = 0; i < sb_height_in_blocks; ++i) { for (j = 0; j < sb_width_in_blocks; ++j) { MODE_INFO *mi = mi_8x8[index+j]; - BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; + BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; bs_hist[sb_type]++; *min_block_size = VPXMIN(*min_block_size, sb_type); *max_block_size = VPXMAX(*max_block_size, sb_type); @@ -2161,8 +2342,8 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi = xd->mi; - const int left_in_image = xd->left_available && mi[-1]; - const int above_in_image = xd->up_available && mi[-xd->mi_stride]; + const int left_in_image = !!xd->left_mi; + const int above_in_image = !!xd->above_mi; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; @@ -2250,26 +2431,26 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { mi = prev_mi[idy * cm->mi_stride + idx]; - bs = mi ? mi->mbmi.sb_type : bsize; + bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } } } - if (xd->left_available) { + if (xd->left_mi) { for (idy = 0; idy < mi_height; ++idy) { mi = xd->mi[idy * cm->mi_stride - 1]; - bs = mi ? mi->mbmi.sb_type : bsize; + bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } } - if (xd->up_available) { + if (xd->above_mi) { for (idx = 0; idx < mi_width; ++idx) { mi = xd->mi[idx - cm->mi_stride]; - bs = mi ? mi->mbmi.sb_type : bsize; + bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } @@ -2354,7 +2535,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *ctx = &pc_tree->none; - int i, pl; + int i; + const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); BLOCK_SIZE subsize; RD_COST this_rdc, sum_rdc, best_rdc; int do_split = bsize >= BLOCK_8X8; @@ -2400,7 +2582,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) + if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) x->mb_energy = vp9_block_energy(cpi, x, bsize); if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { @@ -2424,8 +2606,15 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.use_square_partition_only && bsize > cpi->sf.use_square_only_threshold) { + if (cpi->use_svc) { + if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) + partition_horz_allowed &= force_horz_split; + if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) + partition_vert_allowed &= force_vert_split; + } else { partition_horz_allowed &= force_horz_split; partition_vert_allowed &= force_vert_split; + } } save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2495,7 +2684,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, &this_rdc, bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { if (bsize >= BLOCK_8X8) { - pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); @@ -2549,6 +2737,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, break; } } + if (skip) { if (src_diff_var == UINT_MAX) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); @@ -2580,15 +2769,16 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, i = 4; if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = - ctx->mic.mbmi.interp_filter; + ctx->mic.interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); + if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; } else { for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { - const int x_idx = (i & 1) * mi_step; - const int y_idx = (i >> 1) * mi_step; + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2614,7 +2804,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { - pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); @@ -2651,7 +2840,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = - ctx->mic.mbmi.interp_filter; + ctx->mic.interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0], best_rdc.rdcost); @@ -2666,7 +2855,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = - ctx->mic.mbmi.interp_filter; + ctx->mic.interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], best_rdc.rdcost - sum_rdc.rdcost); @@ -2680,7 +2869,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (sum_rdc.rdcost < best_rdc.rdcost) { - pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); if (sum_rdc.rdcost < best_rdc.rdcost) { @@ -2694,6 +2882,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } + // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { @@ -2704,7 +2893,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = - ctx->mic.mbmi.interp_filter; + ctx->mic.interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0], best_rdc.rdcost); if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && @@ -2718,7 +2907,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = - ctx->mic.mbmi.interp_filter; + ctx->mic.interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); @@ -2732,7 +2921,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (sum_rdc.rdcost < best_rdc.rdcost) { - pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); @@ -2777,6 +2965,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; SPEED_FEATURES *const sf = &cpi->sf; + const int mi_col_start = tile_info->mi_col_start; + const int mi_col_end = tile_info->mi_col_end; int mi_col; // Initialize the left context for the new SB row @@ -2784,8 +2974,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row - for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; - mi_col += MI_BLOCK_SIZE) { + for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { const struct segmentation *const seg = &cm->seg; int dummy_rate; int64_t dummy_dist; @@ -2890,8 +3079,8 @@ static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { - if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size) - mi_ptr[mi_col]->mbmi.tx_size = max_tx_size; + if (mi_ptr[mi_col]->tx_size > max_tx_size) + mi_ptr[mi_col]->tx_size = max_tx_size; } } } @@ -2938,18 +3127,32 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; + MODE_INFO *mi; + ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; + BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; + int plane; + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - mbmi = &xd->mi[0]->mbmi; - mbmi->sb_type = bsize; + mi = xd->mi[0]; + mi->sb_type = bsize; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *pd = &xd->plane[plane]; + memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, + (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); + memcpy(l + num_4x4_blocks_high * plane, pd->left_context, + (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); + } if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) + if (cyclic_refresh_segment_id_boosted(mi->segment_id)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); if (cm->frame_type == KEY_FRAME) hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); - else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); else if (bsize >= BLOCK_8X8) vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, @@ -2960,6 +3163,14 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *pd = &xd->plane[plane]; + memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, + (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); + memcpy(pd->left_context, l + num_4x4_blocks_high * plane, + (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); + } + if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); @@ -3109,7 +3320,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx); - ctx->mic.mbmi = xd->mi[0]->mbmi; + ctx->mic = *xd->mi[0]; ctx->mbmi_ext = *x->mbmi_ext; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; @@ -3192,7 +3403,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; @@ -3204,7 +3415,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, &this_rdc, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; @@ -3237,7 +3448,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; @@ -3248,7 +3459,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; @@ -3320,7 +3531,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4; + subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { @@ -3345,7 +3556,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); - pc_tree->none.mic.mbmi = xd->mi[0]->mbmi; + pc_tree->none.mic = *xd->mi[0]; pc_tree->none.mbmi_ext = *x->mbmi_ext; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; @@ -3354,7 +3565,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; @@ -3362,7 +3573,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; @@ -3377,7 +3588,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; @@ -3385,7 +3596,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; @@ -3457,7 +3668,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4; + subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) { @@ -3470,7 +3681,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->none); - pc_tree->none.mic.mbmi = xd->mi[0]->mbmi; + pc_tree->none.mic = *xd->mi[0]; pc_tree->none.mbmi_ext = *x->mbmi_ext; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; @@ -3481,7 +3692,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->vertical[0]); - pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; @@ -3491,7 +3702,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, subsize, &pc_tree->vertical[1]); - pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; @@ -3503,7 +3714,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->horizontal[0]); - pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; @@ -3514,7 +3725,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, subsize, &pc_tree->horizontal[1]); - pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; + pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; @@ -3563,6 +3774,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; + const int mi_col_start = tile_info->mi_col_start; + const int mi_col_end = tile_info->mi_col_end; int mi_col; // Initialize the left context for the new SB row @@ -3570,8 +3783,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row - for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; - mi_col += MI_BLOCK_SIZE) { + for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { const struct segmentation *const seg = &cm->seg; RD_COST dummy_rdc; const int idx_str = cm->mi_stride * mi_row + mi_col; @@ -3584,6 +3796,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, vp9_rd_cost_init(&dummy_rdc); x->color_sensitivity[0] = 0; x->color_sensitivity[1] = 0; + x->sb_is_skin = 0; if (seg->enabled) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map @@ -3620,8 +3833,14 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, break; case REFERENCE_PARTITION: set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - xd->mi[0]->mbmi.segment_id) { + // Use nonrd_pick_partition on scene-cut for VBR, or on qp-segment + // if cyclic_refresh is enabled. + // nonrd_pick_partition does not support 4x4 partition, so avoid it + // on key frame for now. + if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && + cm->frame_type != KEY_FRAME) || + (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + xd->mi[0]->segment_id)) { // Use lower max_partition_size for low resoultions. if (cm->width <= 352 && cm->height <= 288) x->max_partition_size = BLOCK_32X32; @@ -3775,8 +3994,7 @@ static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { } return (intra_count << 2) < inter_count && - cm->frame_type != KEY_FRAME && - cm->show_frame; + cm->frame_type != KEY_FRAME && cm->show_frame; } void vp9_init_tile_data(VP9_COMP *cpi) { @@ -3829,10 +4047,15 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, &cpi->tile_data[tile_row * tile_cols + tile_col]; const TileInfo * const tile_info = &this_tile->tile_info; TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; + const int mi_row_start = tile_info->mi_row_start; + const int mi_row_end = tile_info->mi_row_end; int mi_row; - for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; - mi_row += MI_BLOCK_SIZE) { + // Set up pointers to per thread motion search counters. + td->mb.m_search_count_ptr = &td->rd_counts.m_search_count; + td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count; + + for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) { if (cpi->sf.use_nonrd_pick_mode) encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); else @@ -3887,6 +4110,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(rdc->coef_counts); vp9_zero(rdc->comp_pred_diff); vp9_zero(rdc->filter_diff); + rdc->m_search_count = 0; // Count of motion search hits. + rdc->ex_search_count = 0; // Exhaustive mesh search hits. xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && @@ -3957,10 +4182,10 @@ static void encode_frame_internal(VP9_COMP *cpi) { vpx_usec_timer_start(&emr_timer); #if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, - &cpi->twopass.this_frame_mb_stats); - } + if (cpi->use_fp_mb_stats) { + input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, + &cpi->twopass.this_frame_mb_stats); + } #endif // If allowed, encoding tiles in parallel with one thread handling one tile. @@ -3999,6 +4224,31 @@ static INTERP_FILTER get_interp_filter( } } +static int compute_frame_aq_offset(struct VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; + struct segmentation *const seg = &cm->seg; + + int mi_row, mi_col; + int sum_delta = 0; + int map_index = 0; + int qdelta_index; + int segment_id; + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + MODE_INFO **mi_8x8 = mi_8x8_ptr; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { + segment_id = mi_8x8[0]->segment_id; + qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); + sum_delta += qdelta_index; + map_index++; + } + mi_8x8_ptr += cm->mi_stride; + } + + return sum_delta / (cm->mi_rows * cm->mi_cols); +} + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -4121,12 +4371,17 @@ void vp9_encode_frame(VP9_COMP *cpi) { cm->reference_mode = SINGLE_REFERENCE; encode_frame_internal(cpi); } -} + // If segmentated AQ is enabled compute the average AQ weighting. + if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && + (cm->seg.update_map || cm->seg.update_data)) { + cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); + } +} static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { - const PREDICTION_MODE y_mode = mi->mbmi.mode; - const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode; - const BLOCK_SIZE bsize = mi->mbmi.sb_type; + const PREDICTION_MODE y_mode = mi->mode; + const PREDICTION_MODE uv_mode = mi->uv_mode; + const BLOCK_SIZE bsize = mi->sb_type; if (bsize < BLOCK_8X8) { int idx, idy; @@ -4142,6 +4397,32 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { ++counts->uv_mode[y_mode][uv_mode]; } +static void update_zeromv_cnt(VP9_COMP *const cpi, + const MODE_INFO *const mi, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + MV mv = mi->mv[0].as_mv; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + int x, y; + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + int map_offset = block_index + y * cm->mi_cols + x; + if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { + if (abs(mv.row) < 8 && abs(mv.col) < 8) { + if (cpi->consec_zero_mv[map_offset] < 255) + cpi->consec_zero_mv[map_offset]++; + } else { + cpi->consec_zero_mv[map_offset] = 0; + } + } + } +} + static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -4149,16 +4430,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO **mi_8x8 = xd->mi; - MODE_INFO *mi = mi_8x8[0]; - MB_MODE_INFO *mbmi = &mi->mbmi; - const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id, + MODE_INFO *mi = xd->mi[0]; + const int seg_skip = segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); - const int mis = cm->mi_stride; - const int mi_width = num_8x8_blocks_wide_lookup[bsize]; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; - x->skip_recode = !x->select_tx_size && mbmi->sb_type >= BLOCK_8X8 && + x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && cpi->oxcf.aq_mode != COMPLEXITY_AQ && cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && cpi->sf.allow_skip_recode; @@ -4175,21 +4451,28 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, if (x->skip_encode) return; - if (!is_inter_block(mbmi)) { + if (!is_inter_block(mi)) { int plane; - mbmi->skip = 1; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && + need_top_left[mi->uv_mode]) + assert(0); +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + mi->skip = 1; for (plane = 0; plane < MAX_MB_PLANE; ++plane) - vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane); + vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); if (output_enabled) sum_intra_stats(td->counts, mi); - vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); + vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, + VPXMAX(bsize, BLOCK_8X8)); } else { int ref; - const int is_compound = has_second_ref(mbmi); - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + const int is_compound = has_second_ref(mi); + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, - mbmi->ref_frame[ref]); + mi->ref_frame[ref]); assert(cfg != NULL); vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); @@ -4202,34 +4485,31 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, VPXMAX(bsize, BLOCK_8X8)); vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); - vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); + vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, + VPXMAX(bsize, BLOCK_8X8)); } if (output_enabled) { if (cm->tx_mode == TX_MODE_SELECT && - mbmi->sb_type >= BLOCK_8X8 && - !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) { + mi->sb_type >= BLOCK_8X8 && + !(is_inter_block(mi) && (mi->skip || seg_skip))) { ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), - &td->counts->tx)[mbmi->tx_size]; + &td->counts->tx)[mi->tx_size]; } else { - int x, y; - TX_SIZE tx_size; // The new intra coding scheme requires no change of transform size - if (is_inter_block(&mi->mbmi)) { - tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], - max_txsize_lookup[bsize]); + if (is_inter_block(mi)) { + mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], + max_txsize_lookup[bsize]); } else { - tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; + mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; } - - for (y = 0; y < mi_height; y++) - for (x = 0; x < mi_width; x++) - if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) - mi_8x8[mis * y + x]->mbmi.tx_size = tx_size; } - ++td->counts->tx.tx_totals[mbmi->tx_size]; - ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; + + ++td->counts->tx.tx_totals[mi->tx_size]; + ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) - vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize); + vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); + if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0) + update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); } } diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c index 3c6a9283c..169943c10 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.c +++ b/libvpx/vp9/encoder/vp9_encodemb.c @@ -50,27 +50,21 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { pd->dst.buf, pd->dst.stride); } -#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) - typedef struct vp9_token_state { + int64_t error; int rate; - int error; - int next; + int16_t next; int16_t token; - int16_t qc; + tran_low_t qc; + tran_low_t dqc; } vp9_token_state; -// TODO(jimbankoski): experiment to find optimal RD numbers. -static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 }; +static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, }; #define UPDATE_RD_COST()\ {\ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ - if (rd_cost0 == rd_cost1) {\ - rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ - rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ - }\ } // This function is a place holder for now but may ultimately need @@ -91,7 +85,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi[0]->mbmi); + const int ref = is_inter_block(xd->mi[0]); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; uint8_t token_cache[1024]; @@ -101,32 +95,32 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, const int eob = p->eobs[block]; const PLANE_TYPE type = get_plane_type(plane); const int default_eob = 16 << (tx_size << 1); - const int mul = 1 + (tx_size == TX_32X32); - const int16_t *dequant_ptr = pd->dequant; - const uint8_t *const band_translate = get_band_translate(tx_size); + const int shift = (tx_size == TX_32X32); + const int16_t* const dequant_ptr = pd->dequant; + const uint8_t* const band_translate = get_band_translate(tx_size); const scan_order *const so = get_scan(xd, tx_size, type, block); const int16_t *const scan = so->scan; const int16_t *const nb = so->neighbors; + const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift }; int next = eob, sz = 0; - int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv; + const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1; + const int64_t rddiv = mb->rddiv; int64_t rd_cost0, rd_cost1; - int rate0, rate1, error0, error1; + int rate0, rate1; + int64_t error0, error1; int16_t t0, t1; EXTRABIT e0; int best, band, pt, i, final_eob; #if CONFIG_VP9_HIGHBITDEPTH - const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); + const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else - const int16_t *cat6_high_cost = vp9_get_high_cost_table(8); + const int *cat6_high_cost = vp9_get_high_cost_table(8); #endif assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - if (!ref) - rdmult = (rdmult * 9) >> 4; - /* Initialize the sentinel node of the trellis. */ tokens[eob][0].rate = 0; tokens[eob][0].error = 0; @@ -165,7 +159,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, /* And pick the best. */ best = rd_cost1 < rd_cost0; base_bits = vp9_get_cost(t0, e0, cat6_high_cost); - dx = mul * (dqcoeff[rc] - coeff[rc]); + dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { dx >>= xd->bd - 8; @@ -177,14 +171,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, tokens[i][0].next = next; tokens[i][0].token = t0; tokens[i][0].qc = x; + tokens[i][0].dqc = dqcoeff[rc]; best_index[i][0] = best; /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && - (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + + if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) && + (abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])) shortcut = 1; else @@ -193,6 +188,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, if (shortcut) { sz = -(x < 0); x -= 2 * sz + 1; + } else { + tokens[i][1] = tokens[i][0]; + best_index[i][1] = best_index[i][0]; + next = i; + continue; } /* Consider both possible successor states. */ @@ -243,6 +243,24 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, tokens[i][1].next = next; tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; + + if (x) { + tran_low_t offset = dq_step[rc != 0]; + // The 32x32 transform coefficient uses half quantization step size. + // Account for the rounding difference in the dequantized coefficeint + // value when the quantization index is dropped from an even number + // to an odd number. + if (shift & x) + offset += (dequant_ptr[rc != 0] & 0x01); + + if (sz == 0) + tokens[i][1].dqc = dqcoeff[rc] - offset; + else + tokens[i][1].dqc = dqcoeff[rc] + offset; + } else { + tokens[i][1].dqc = 0; + } + best_index[i][1] = best; /* Finally, make this the new head of the trellis. */ next = i; @@ -282,18 +300,13 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = -1; - memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); - memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); + for (i = next; i < eob; i = next) { const int x = tokens[i][best].qc; const int rc = scan[i]; - if (x) { - final_eob = i; - } - + if (x) final_eob = i; qcoeff[rc] = x; - dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; - + dqcoeff[rc] = tokens[i][best].dqc; next = tokens[i][best].next; best = best_index[i][best]; } @@ -736,11 +749,11 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct encode_b_args arg = {x, &ctx, &mbmi->skip}; + MODE_INFO *mi = xd->mi[0]; + struct encode_b_args arg = {x, &ctx, &mi->skip}; int plane; - mbmi->skip = 1; + mi->skip = 1; if (x->skip) return; @@ -751,7 +764,7 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { const struct macroblockd_plane* const pd = &xd->plane[plane]; - const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); } @@ -766,7 +779,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); @@ -783,17 +796,26 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; int i, j; + struct optimize_ctx *const ctx = args->ctx; + ENTROPY_CONTEXT *a = NULL; + ENTROPY_CONTEXT *l = NULL; + int entropy_ctx = 0; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); dst = &pd->dst.buf[4 * (j * dst_stride + i)]; src = &p->src.buf[4 * (j * src_stride + i)]; src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + if (args->ctx != NULL) { + a = &ctx->ta[plane][i]; + l = &ctx->tl[plane][j]; + entropy_ctx = combine_entropy_contexts(*a, *l); + } if (tx_size == TX_4X4) { tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; } else { - mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; + mode = plane == 0 ? mi->mode : mi->uv_mode; if (tx_size == TX_32X32) { scan_order = &vp9_default_scan_orders[TX_32X32]; } else { @@ -905,6 +927,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, pd->dequant, eob, scan_order->scan, scan_order->iscan); } + if (args->ctx != NULL && !x->skip_recode) { + *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; + } if (!x->skip_encode && *eob) vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); break; @@ -918,6 +943,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, pd->dequant, eob, scan_order->scan, scan_order->iscan); } + if (args->ctx != NULL && !x->skip_recode) { + *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; + } if (!x->skip_encode && *eob) vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; @@ -931,6 +959,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, pd->dequant, eob, scan_order->scan, scan_order->iscan); } + if (args->ctx != NULL && !x->skip_recode) { + *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; + } if (!x->skip_encode && *eob) vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; @@ -947,7 +978,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, pd->dequant, eob, scan_order->scan, scan_order->iscan); } - + if (args->ctx != NULL && !x->skip_recode) { + *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; + } if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) // this is like vp9_short_idct4x4 but has a special case around eob<=1 @@ -966,9 +999,20 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, *(args->skip) = 0; } -void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { +void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, + int enable_optimize_b) { const MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; + struct optimize_ctx ctx; + struct encode_b_args arg = {x, NULL, &xd->mi[0]->skip}; + + if (enable_optimize_b && x->optimize && + (!x->skip_recode || !x->skip_optimize)) { + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(xd->mi[0], pd) : + xd->mi[0]->tx_size; + vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); + arg.ctx = &ctx; + } vp9_foreach_transformed_block_in_plane(xd, bsize, plane, vp9_encode_block_intra, &arg); diff --git a/libvpx/vp9/encoder/vp9_encodemb.h b/libvpx/vp9/encoder/vp9_encodemb.h index 97df8a66b..25b0b23e0 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.h +++ b/libvpx/vp9/encoder/vp9_encodemb.h @@ -37,7 +37,8 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); -void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); +void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, + int enable_optimize_b); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c index e71966343..71f27cc53 100644 --- a/libvpx/vp9/encoder/vp9_encodemv.c +++ b/libvpx/vp9/encoder/vp9_encodemv.c @@ -75,11 +75,12 @@ static void encode_mv_component(vpx_writer* w, int comp, static void build_nmv_component_cost_table(int *mvcost, const nmv_component* const mvcomp, int usehp) { - int i, v; int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; int bits_cost[MV_OFFSET_BITS][2]; int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE]; int class0_hp_cost[2], hp_cost[2]; + int i; + int c, o; sign_cost[0] = vp9_cost_zero(mvcomp->sign); sign_cost[1] = vp9_cost_one(mvcomp->sign); @@ -94,51 +95,64 @@ static void build_nmv_component_cost_table(int *mvcost, vp9_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp9_mv_fp_tree); vp9_cost_tokens(fp_cost, mvcomp->fp, vp9_mv_fp_tree); - if (usehp) { - class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp); - class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp); - hp_cost[0] = vp9_cost_zero(mvcomp->hp); - hp_cost[1] = vp9_cost_one(mvcomp->hp); - } + // Always build the hp costs to avoid an uninitialized warning from gcc + class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp); + class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp); + hp_cost[0] = vp9_cost_zero(mvcomp->hp); + hp_cost[1] = vp9_cost_one(mvcomp->hp); + mvcost[0] = 0; - for (v = 1; v <= MV_MAX; ++v) { - int z, c, o, d, e, f, cost = 0; - z = v - 1; - c = vp9_get_mv_class(z, &o); - cost += class_cost[c]; + // MV_CLASS_0 + for (o = 0; o < (CLASS0_SIZE << 3); ++o) { + int d, e, f; + int cost = class_cost[MV_CLASS_0]; + int v = o + 1; d = (o >> 3); /* int mv data */ f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - if (c == MV_CLASS_0) { - cost += class0_cost[d]; - } else { - int i, b; - b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) - cost += bits_cost[i][((d >> i) & 1)]; - } - if (c == MV_CLASS_0) { - cost += class0_fp_cost[d][f]; - } else { - cost += fp_cost[f]; - } + cost += class0_cost[d]; + cost += class0_fp_cost[d][f]; if (usehp) { - if (c == MV_CLASS_0) { - cost += class0_hp_cost[e]; - } else { - cost += hp_cost[e]; - } + e = (o & 1); /* high precision mv data */ + cost += class0_hp_cost[e]; } mvcost[v] = cost + sign_cost[0]; mvcost[-v] = cost + sign_cost[1]; } + for (c = MV_CLASS_1; c < MV_CLASSES; ++c) { + int d; + for (d = 0; d < (1 << c); ++d) { + int f; + int whole_cost = class_cost[c]; + int b = c + CLASS0_BITS - 1; /* number of bits */ + for (i = 0; i < b; ++i) + whole_cost += bits_cost[i][((d >> i) & 1)]; + for (f = 0; f < 4; ++f) { + int cost = whole_cost + fp_cost[f]; + int v = (CLASS0_SIZE << (c + 2)) + d * 8 + f * 2 /* + e */ + 1; + if (usehp) { + mvcost[v] = cost + hp_cost[0] + sign_cost[0]; + mvcost[-v] = cost + hp_cost[0] + sign_cost[1]; + if (v + 1 > MV_MAX) break; + mvcost[v + 1] = cost + hp_cost[1] + sign_cost[0]; + mvcost[-v - 1] = cost + hp_cost[1] + sign_cost[1]; + } else { + mvcost[v] = cost + sign_cost[0]; + mvcost[-v] = cost + sign_cost[1]; + if (v + 1 > MV_MAX) break; + mvcost[v + 1] = cost + sign_cost[0]; + mvcost[-v - 1] = cost + sign_cost[1]; + } + } + } + } } static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p, vpx_prob upd_p) { const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1; const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) > - cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + 7 * 256; + cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + + (7 << VP9_PROB_COST_SHIFT); vpx_write(w, update, upd_p); if (update) { *cur_p = new_p; @@ -206,7 +220,7 @@ void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w, const MV diff = {mv->row - ref->row, mv->col - ref->col}; const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff); - usehp = usehp && vp9_use_mv_hp(ref); + usehp = usehp && use_mv_hp(ref); vp9_write_token(w, vp9_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]); if (mv_joint_vertical(j)) @@ -230,13 +244,13 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); } -static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, +static void inc_mvs(const MODE_INFO *mi, const MB_MODE_INFO_EXT *mbmi_ext, const int_mv mvs[2], nmv_context_counts *counts) { int i; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { - const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv; + for (i = 0; i < 1 + has_second_ref(mi); ++i) { + const MV *ref = &mbmi_ext->ref_mvs[mi->ref_frame[i]][0].as_mv; const MV diff = {mvs[i].as_mv.row - ref->row, mvs[i].as_mv.col - ref->col}; vp9_inc_mv(&diff, counts); @@ -246,24 +260,23 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, void vp9_update_mv_count(ThreadData *td) { const MACROBLOCKD *xd = &td->mb.e_mbd; const MODE_INFO *mi = xd->mi[0]; - const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *mbmi_ext = td->mb.mbmi_ext; - if (mbmi->sb_type < BLOCK_8X8) { - const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type]; + if (mi->sb_type < BLOCK_8X8) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[mi->sb_type]; + const int num_4x4_h = num_4x4_blocks_high_lookup[mi->sb_type]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; if (mi->bmi[i].as_mode == NEWMV) - inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv); + inc_mvs(mi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv); } } } else { - if (mbmi->mode == NEWMV) - inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv); + if (mi->mode == NEWMV) + inc_mvs(mi, mbmi_ext, mi->mv, &td->counts->mv); } } diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c index 72eafec40..147f97004 100644 --- a/libvpx/vp9/encoder/vp9_encoder.c +++ b/libvpx/vp9/encoder/vp9_encoder.c @@ -36,6 +36,7 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_aq_variance.h" @@ -47,6 +48,7 @@ #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" +#include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" @@ -60,8 +62,6 @@ #define AM_SEGMENT_ID_INACTIVE 7 #define AM_SEGMENT_ID_ACTIVE 0 -#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */ - #define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv // for altref computation. #define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision @@ -86,6 +86,25 @@ FILE *kf_list; FILE *keyfile; #endif +static const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = { + {LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8}, + {LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8}, + {LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8}, + {LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8}, + {LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8}, + {LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8}, + {LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8}, + {LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6}, + {LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4}, + {LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4}, + // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when + // they are finalized (currently TBD). + {LEVEL_5_2, 1176502272, 8912896, 180000, 0, 8, 8, 10, 4}, + {LEVEL_6, 1176502272, 35651584, 180000, 0, 8, 16, 10, 4}, + {LEVEL_6_1, 2353004544u, 35651584, 240000, 0, 8, 16, 10, 4}, + {LEVEL_6_2, 4706009088u, 35651584, 480000, 0, 8, 16, 10, 4}, +}; + static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: @@ -116,11 +135,16 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { // so memset cannot be used, instead only inactive blocks should be reset. static void suppress_active_map(VP9_COMP *cpi) { unsigned char *const seg_map = cpi->segmentation_map; - int i; - if (cpi->active_map.enabled || cpi->active_map.update) - for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) + + if (cpi->active_map.enabled || cpi->active_map.update) { + const int rows = cpi->common.mi_rows; + const int cols = cpi->common.mi_cols; + int i; + + for (i = 0; i < rows * cols; ++i) if (seg_map[i] == AM_SEGMENT_ID_INACTIVE) seg_map[i] = AM_SEGMENT_ID_ACTIVE; + } } static void apply_active_map(VP9_COMP *cpi) { @@ -159,6 +183,39 @@ static void apply_active_map(VP9_COMP *cpi) { } } +static void init_level_info(Vp9LevelInfo *level_info) { + Vp9LevelStats *const level_stats = &level_info->level_stats; + Vp9LevelSpec *const level_spec = &level_info->level_spec; + + memset(level_stats, 0, sizeof(*level_stats)); + memset(level_spec, 0, sizeof(*level_spec)); + level_spec->level = LEVEL_UNKNOWN; + level_spec->min_altref_distance = INT_MAX; +} + +VP9_LEVEL vp9_get_level(const Vp9LevelSpec * const level_spec) { + int i; + const Vp9LevelSpec *this_level; + + vpx_clear_system_state(); + + for (i = 0; i < VP9_LEVELS; ++i) { + this_level = &vp9_level_defs[i]; + if ((double)level_spec->max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P) > + (double)this_level->max_luma_sample_rate || + level_spec->max_luma_picture_size > this_level->max_luma_picture_size || + level_spec->average_bitrate > this_level->average_bitrate || + level_spec->max_cpb_size > this_level->max_cpb_size || + level_spec->compression_ratio < this_level->compression_ratio || + level_spec->max_col_tiles > this_level->max_col_tiles || + level_spec->min_altref_distance < this_level->min_altref_distance || + level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers) + continue; + break; + } + return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level; +} + int vp9_set_active_map(VP9_COMP* cpi, unsigned char* new_map_16x16, int rows, @@ -375,6 +432,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; + vpx_free(cpi->consec_zero_mv); + cpi->consec_zero_mv = NULL; + vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); @@ -410,6 +470,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { memset(&cpi->svc.scaled_frames[0], 0, MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); + vpx_free_frame_buffer(&cpi->svc.scaled_temp); + memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp)); + vpx_free_frame_buffer(&cpi->svc.empty_frame.img); memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame)); @@ -607,7 +670,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { MODE_INFO **mi_8x8 = mi_8x8_ptr; uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) - cache[0] = mi_8x8[0]->mbmi.segment_id; + cache[0] = mi_8x8[0]->segment_id; mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } @@ -768,7 +831,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cpi->oxcf = *oxcf; cpi->framerate = oxcf->init_framerate; - cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; #if CONFIG_VP9_HIGHBITDEPTH @@ -777,6 +839,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->color_space = oxcf->color_space; cm->color_range = oxcf->color_range; + cpi->target_level = oxcf->target_level; + cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX; + cm->width = oxcf->width; cm->height = oxcf->height; alloc_compressor_data(cpi); @@ -805,6 +870,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cpi->ref_frame_flags = 0; init_buffer_indices(cpi); + + vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height); } static void set_rc_buffer_sizes(RATE_CONTROL *rc, @@ -1465,6 +1532,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cm->color_space = oxcf->color_space; cm->color_range = oxcf->color_range; + cpi->target_level = oxcf->target_level; + cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX; + if (cm->profile <= PROFILE_1) assert(cm->bit_depth == VPX_BITS_8); else @@ -1475,7 +1545,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->td.mb.e_mbd.bd = (int)cm->bit_depth; #endif // CONFIG_VP9_HIGHBITDEPTH - rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2; + if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) { + rc->baseline_gf_interval = FIXED_GF_INTERVAL; + } else { + rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2; + } cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; @@ -1519,6 +1593,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { cm->width = cpi->oxcf.width; cm->height = cpi->oxcf.height; + cpi->external_resize = 1; } if (cpi->initial_width) { @@ -1530,10 +1605,22 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { alloc_compressor_data(cpi); realloc_segmentation_maps(cpi); cpi->initial_width = cpi->initial_height = 0; + cpi->external_resize = 0; + } else if (cm->mi_alloc_size == new_mi_size && + (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) { + vp9_alloc_loop_filter(cm); } } + update_frame_size(cpi); + if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { + memset(cpi->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_reset_resize(cpi); + } + if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || @@ -1567,7 +1654,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { #endif #define log2f(x) (log (x) / (float) M_LOG2_E) +/*********************************************************************** + * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' * + *********************************************************************** + * The following 2 functions ('cal_nmvjointsadcost' and * + * 'cal_nmvsadcosts') are used to calculate cost lookup tables * + * used by 'vp9_diamond_search_sad'. The C implementation of the * + * function is generic, but the AVX intrinsics optimised version * + * relies on the following properties of the computed tables: * + * For cal_nmvjointsadcost: * + * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] * + * For cal_nmvsadcosts: * + * - For all i: mvsadcost[0][i] == mvsadcost[1][i] * + * (Equal costs for both components) * + * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] * + * (Cost function is even) * + * If these do not hold, then the AVX optimised version of the * + * 'vp9_diamond_search_sad' function cannot be used as it is, in which * + * case you can revert to using the C function instead. * + ***********************************************************************/ + static void cal_nmvjointsadcost(int *mvjointsadcost) { + /********************************************************************* + * Warning: Read the comments above before modifying this function * + *********************************************************************/ mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; @@ -1575,6 +1685,9 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) { } static void cal_nmvsadcosts(int *mvsadcost[2]) { + /********************************************************************* + * Warning: Read the comments above before modifying this function * + *********************************************************************/ int i = 1; mvsadcost[0][0] = 0; @@ -1604,7 +1717,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } - VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, BufferPool *const pool) { unsigned int i; @@ -1635,12 +1747,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->use_svc = 0; cpi->resize_state = 0; + cpi->external_resize = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; + cpi->use_skin_detection = 0; cpi->common.buffer_pool = pool; - cpi->rc.high_source_sad = 0; - init_config(cpi, oxcf); vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); @@ -1650,6 +1762,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, realloc_segmentation_maps(cpi); + CHECK_MEM_ERROR(cm, cpi->consec_zero_mv, + vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cpi->consec_zero_mv))); + CHECK_MEM_ERROR(cm, cpi->nmvcosts[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0]))); CHECK_MEM_ERROR(cm, cpi->nmvcosts[1], @@ -1689,6 +1805,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->multi_arf_last_grp_enabled = 0; cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; + + init_level_info(&cpi->level_info); + #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; cpi->b_calculate_blockiness = 1; @@ -1727,8 +1846,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, } if (cpi->b_calculate_consistency) { - cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars) * - 4 * cpi->common.mi_rows * cpi->common.mi_cols); + CHECK_MEM_ERROR(cm, cpi->ssim_vars, + vpx_malloc(sizeof(*cpi->ssim_vars) * 4 * + cpi->common.mi_rows * cpi->common.mi_cols)); cpi->worst_consistency = 100.0; } @@ -1736,6 +1856,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->first_time_stamp_ever = INT64_MAX; + /********************************************************************* + * Warning: Read the comments around 'cal_nmvjointsadcost' and * + * 'cal_nmvsadcosts' before modifying how these tables are computed. * + *********************************************************************/ cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost); cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; @@ -1928,11 +2052,14 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, return cpi; } + +#if CONFIG_INTERNAL_STATS #define SNPRINT(H, T) \ snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T)) #define SNPRINT2(H, T, V) \ snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) +#endif // CONFIG_INTERNAL_STATS void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; @@ -1958,6 +2085,8 @@ void vp9_remove_compressor(VP9_COMP *cpi) { const double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded; const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1); + const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000; + const double rate_err = ((100.0 * (dr - target_rate)) / target_rate); if (cpi->b_calculate_psnr) { const double total_psnr = @@ -2009,8 +2138,9 @@ void vp9_remove_compressor(VP9_COMP *cpi) { SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst); } - fprintf(f, "%s\t Time\n", headings); - fprintf(f, "%s\t%8.0f\n", results, total_encode_time); + fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings); + fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results, + total_encode_time, rate_err, fabs(rate_err)); } fclose(f); @@ -2128,7 +2258,7 @@ static void encoder_variance(const uint8_t *a, int a_stride, static void encoder_highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint64_t *sse, - uint64_t *sum) { + int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -2152,7 +2282,7 @@ static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride, int w, int h, unsigned int *sse, int *sum) { uint64_t sse_long = 0; - uint64_t sum_long = 0; + int64_t sum_long = 0; encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); *sse = (unsigned int)sse_long; @@ -2574,10 +2704,6 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, #if CONFIG_VP9_HIGHBITDEPTH static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int bd) { -#else -static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { -#endif // CONFIG_VP9_HIGHBITDEPTH const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const int dst_w = dst->y_crop_width; @@ -2589,19 +2715,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; int x, y, i; - for (y = 0; y < dst_h; y += 16) { - for (x = 0; x < dst_w; x += 16) { - for (i = 0; i < MAX_MB_PLANE; ++i) { - const int factor = (i == 0 || i == 3 ? 1 : 2); + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int factor = (i == 0 || i == 3 ? 1 : 2); + const int src_stride = src_strides[i]; + const int dst_stride = dst_strides[i]; + for (y = 0; y < dst_h; y += 16) { + const int y_q4 = y * (16 / factor) * src_h / dst_h; + for (x = 0; x < dst_w; x += 16) { const int x_q4 = x * (16 / factor) * src_w / dst_w; - const int y_q4 = y * (16 / factor) * src_h / dst_h; - const int src_stride = src_strides[i]; - const int dst_stride = dst_strides[i]; const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * - src_stride + (x / factor) * src_w / dst_w; + src_stride + (x / factor) * src_w / dst_w; uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); -#if CONFIG_VP9_HIGHBITDEPTH if (src->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, kernel[x_q4 & 0xf], 16 * src_w / dst_w, @@ -2613,18 +2738,49 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, 16 / factor); } + } + } + } + + vpx_extend_frame_borders(dst); +} #else +void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + const int src_w = src->y_crop_width; + const int src_h = src->y_crop_height; + const int dst_w = dst->y_crop_width; + const int dst_h = dst->y_crop_height; + const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; + const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; + uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; + const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; + const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; + int x, y, i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int factor = (i == 0 || i == 3 ? 1 : 2); + const int src_stride = src_strides[i]; + const int dst_stride = dst_strides[i]; + for (y = 0; y < dst_h; y += 16) { + const int y_q4 = y * (16 / factor) * src_h / dst_h; + for (x = 0; x < dst_w; x += 16) { + const int x_q4 = x * (16 / factor) * src_w / dst_w; + const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * + src_stride + (x / factor) * src_w / dst_w; + uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); + vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, 16 / factor); -#endif // CONFIG_VP9_HIGHBITDEPTH } } } vpx_extend_frame_borders(dst); } +#endif // CONFIG_VP9_HIGHBITDEPTH static int scale_down(VP9_COMP *cpi, int q) { RATE_CONTROL *const rc = &cpi->rc; @@ -2641,6 +2797,13 @@ static int scale_down(VP9_COMP *cpi, int q) { return scale; } +static int big_rate_miss(VP9_COMP *cpi, int high_limit, int low_limit) { + const RATE_CONTROL *const rc = &cpi->rc; + + return (rc->projected_frame_size > ((high_limit * 3) / 2)) || + (rc->projected_frame_size < (low_limit / 2)); +} + // Function to test for conditions that indicate we should loop // back and recode a frame. static int recode_loop_test(VP9_COMP *cpi, @@ -2652,6 +2815,7 @@ static int recode_loop_test(VP9_COMP *cpi, int force_recode = 0; if ((rc->projected_frame_size >= rc->max_frame_bandwidth) || + big_rate_miss(cpi, high_limit, low_limit) || (cpi->sf.recode_loop == ALLOW_RECODE) || (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) { @@ -2693,7 +2857,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { } else if (vp9_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term in function - // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if + // vp9_get_refresh_mask() we left it in the GF slot and, if // we're updating the GF with the current decoded frame, we save it to the // ARF slot instead. // We now have to update the ARF with the current frame and swap gld_fb_idx @@ -2750,7 +2914,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { sizeof(cpi->interp_filter_selected[0])); } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_update_frame_info(&cpi->denoiser, *cpi->Source, cpi->common.frame_type, @@ -2760,6 +2925,22 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { cpi->resize_pending); } #endif + if (is_one_pass_cbr_svc(cpi)) { + // Keep track of frame index for each reference frame. + SVC *const svc = &cpi->svc; + if (cm->frame_type == KEY_FRAME) { + svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; + svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; + svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; + } else { + if (cpi->refresh_last_frame) + svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; + if (cpi->refresh_golden_frame) + svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; + if (cpi->refresh_alt_ref_frame) + svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; + } + } } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { @@ -2768,6 +2949,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { if (xd->lossless) { lf->filter_level = 0; + lf->last_filt_level = 0; } else { struct vpx_usec_timer timer; @@ -2775,7 +2957,16 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); + if (!cpi->rc.is_src_frame_alt_ref) { + if ((cpi->common.frame_type == KEY_FRAME) && + (!cpi->rc.this_key_frame_forced)) { + lf->last_filt_level = 0; + } + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); + lf->last_filt_level = lf->filter_level; + } else { + lf->filter_level = 0; + } vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2796,16 +2987,16 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_extend_frame_inner_borders(cm->frame_to_show); } -static INLINE void alloc_frame_mvs(const VP9_COMMON *cm, +static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) { RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows || new_fb_ptr->mi_cols < cm->mi_cols) { vpx_free(new_fb_ptr->mvs); - new_fb_ptr->mvs = - (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*new_fb_ptr->mvs)); + CHECK_MEM_ERROR(cm, new_fb_ptr->mvs, + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*new_fb_ptr->mvs))); new_fb_ptr->mi_rows = cm->mi_rows; new_fb_ptr->mi_cols = cm->mi_cols; } @@ -2843,12 +3034,13 @@ void vp9_scale_references(VP9_COMP *cpi) { if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || new_fb_ptr->buf.y_crop_height != cm->height) { - vpx_realloc_frame_buffer(&new_fb_ptr->buf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - cm->use_highbitdepth, - VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL); + if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + cm->use_highbitdepth, + VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); @@ -2868,19 +3060,31 @@ void vp9_scale_references(VP9_COMP *cpi) { if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || new_fb_ptr->buf.y_crop_height != cm->height) { - vpx_realloc_frame_buffer(&new_fb_ptr->buf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL); - scale_and_extend_frame(ref, &new_fb_ptr->buf); + if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); } #endif // CONFIG_VP9_HIGHBITDEPTH } else { - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - RefCntBuffer *const buf = &pool->frame_bufs[buf_idx]; + int buf_idx; + RefCntBuffer *buf = NULL; + if (cpi->oxcf.pass == 0 && !cpi->use_svc) { + // Check for release of scaled reference. + buf_idx = cpi->scaled_ref_idx[ref_frame - 1]; + buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL; + if (buf != NULL) { + --buf->ref_count; + cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; + } + } + buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + buf = &pool->frame_bufs[buf_idx]; buf->buf.y_crop_width = ref->y_crop_width; buf->buf.y_crop_height = ref->y_crop_height; cpi->scaled_ref_idx[ref_frame - 1] = buf_idx; @@ -2959,18 +3163,49 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { vpx_clear_system_state(); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + recon_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); + } else { + recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); + } +#else recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); +#endif // CONFIG_VP9_HIGHBITDEPTH + + + if (cpi->twopass.total_left_stats.coded_error != 0.0) { + double dc_quant_devisor; +#if CONFIG_VP9_HIGHBITDEPTH + switch (cm->bit_depth) { + case VPX_BITS_8: + dc_quant_devisor = 4.0; + break; + case VPX_BITS_10: + dc_quant_devisor = 16.0; + break; + case VPX_BITS_12: + dc_quant_devisor = 64.0; + break; + default: + assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + break; + } +#else + dc_quant_devisor = 4.0; +#endif - if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d" + fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d" "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" " "%10"PRId64" %10"PRId64" %10d " "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" - "%10lf %8u %10"PRId64" %10d %10d %10d\n", + "%10lf %8u %10"PRId64" %10d %10d %10d %10d %10d\n", cpi->common.current_video_frame, cm->width, cm->height, + cpi->td.rd_counts.m_search_count, + cpi->td.rd_counts.ex_search_count, cpi->rc.source_alt_ref_pending, cpi->rc.source_alt_ref_active, cpi->rc.this_frame_target, @@ -2985,7 +3220,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target), cpi->rc.total_actual_bits, cm->base_qindex, vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth), - (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / 4.0, + (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / + dc_quant_devisor, vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality, cm->bit_depth), cpi->rc.avg_q, @@ -2998,8 +3234,10 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { (1 + cpi->twopass.total_left_stats.coded_error), cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost, cpi->twopass.kf_zeromotion_pct, - cpi->twopass.fr_content_type); - + cpi->twopass.fr_content_type, + cm->lf.filter_level, + cm->seg.aq_av_offset); + } fclose(f); if (0) { @@ -3074,7 +3312,7 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, if (oxcf->pass == 2 && cpi->sf.static_segmentation) configure_static_seg_features(cpi); -#if CONFIG_VP9_POSTPROC +#if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING) if (oxcf->noise_sensitivity > 0) { int l = 0; switch (oxcf->noise_sensitivity) { @@ -3105,12 +3343,14 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (cpi->oxcf.noise_sensitivity > 0 && !cpi->denoiser.frame_buffer_initialized) { - vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, + if (vp9_denoiser_alloc(&cpi->denoiser, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, + cm->use_highbitdepth, #endif - VP9_ENC_BORDER_IN_PIXELS); + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate denoiser"); } } #endif @@ -3160,6 +3400,7 @@ static void set_frame_size(VP9_COMP *cpi) { // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. set_mv_search_params(cpi); + vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height); #if CONFIG_VP9_TEMPORAL_DENOISING // Reset the denoiser on the resized frame. if (cpi->oxcf.noise_sensitivity > 0) { @@ -3182,14 +3423,15 @@ static void set_frame_size(VP9_COMP *cpi) { alloc_frame_mvs(cm, cm->new_fb_idx); // Reset the frame pointers to the current frame size. - vpx_realloc_frame_buffer(get_frame_new_buffer(cm), - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, + if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, + cm->use_highbitdepth, #endif - VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL); + VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); alloc_util_frame_buffers(cpi); init_motion_estimation(cpi); @@ -3234,43 +3476,70 @@ static void encode_without_recode_loop(VP9_COMP *cpi, set_frame_size(cpi); - cpi->Source = vp9_scale_if_required(cm, - cpi->un_scaled_source, - &cpi->scaled_source, - (cpi->oxcf.pass == 0)); - + if (is_one_pass_cbr_svc(cpi) && + cpi->un_scaled_source->y_width == cm->width << 2 && + cpi->un_scaled_source->y_height == cm->height << 2 && + cpi->svc.scaled_temp.y_width == cm->width << 1 && + cpi->svc.scaled_temp.y_height == cm->height << 1) { + cpi->Source = vp9_svc_twostage_scale(cm, + cpi->un_scaled_source, + &cpi->scaled_source, + &cpi->svc.scaled_temp); + } else { + cpi->Source = vp9_scale_if_required(cm, + cpi->un_scaled_source, + &cpi->scaled_source, + (cpi->oxcf.pass == 0)); + } // Avoid scaling last_source unless its needed. - // Last source is currently only used for screen-content mode, - // or if partition_search_type == SOURCE_VAR_BASED_PARTITION. + // Last source is needed if vp9_avg_source_sad() is used, or if + // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise + // estimation is enabled. if (cpi->unscaled_last_source != NULL && (cpi->oxcf.content == VP9E_CONTENT_SCREEN || - cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION)) + (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR && + cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) || + cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION || + cpi->noise_estimate.enabled)) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, &cpi->scaled_last_source, (cpi->oxcf.pass == 0)); -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_denoiser_update_noise_estimate(cpi); + if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) { + memset(cpi->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); } -#endif + + vp9_update_noise_estimate(cpi); if (cpi->oxcf.pass == 0 && - cpi->oxcf.rc_mode == VPX_CBR && + cpi->oxcf.mode == REALTIME && + cpi->oxcf.speed >= 5 && cpi->resize_state == 0 && cm->frame_type != KEY_FRAME && - cpi->oxcf.content == VP9E_CONTENT_SCREEN) + (cpi->oxcf.content == VP9E_CONTENT_SCREEN || + cpi->oxcf.rc_mode == VPX_VBR)) vp9_avg_source_sad(cpi); - if (frame_is_intra_only(cm) == 0) { + // For 1 pass SVC, since only ZEROMV is allowed for upsampled reference + // frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this + // frame-level upsampling. + if (frame_is_intra_only(cm) == 0 && !is_one_pass_cbr_svc(cpi)) { vp9_scale_references(cpi); } set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + if (cpi->oxcf.speed >= 5 && + cpi->oxcf.pass == 0 && + cpi->oxcf.rc_mode == VPX_CBR && + cpi->oxcf.content != VP9E_CONTENT_SCREEN && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + cpi->use_skin_detection = 1; + } + vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q); @@ -3281,6 +3550,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); + } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) { + vp9_360aq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { vp9_setup_in_frame_q_adj(cpi); } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { @@ -3411,6 +3682,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); + } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) { + vp9_360aq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { vp9_setup_in_frame_q_adj(cpi); } @@ -3642,6 +3915,25 @@ static void set_ext_overrides(VP9_COMP *cpi) { } } +YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm, + YV12_BUFFER_CONFIG *unscaled, + YV12_BUFFER_CONFIG *scaled, + YV12_BUFFER_CONFIG *scaled_temp) { + if (cm->mi_cols * MI_SIZE != unscaled->y_width || + cm->mi_rows * MI_SIZE != unscaled->y_height) { +#if CONFIG_VP9_HIGHBITDEPTH + scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth); + scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth); +#else + vp9_scale_and_extend_frame(unscaled, scaled_temp); + vp9_scale_and_extend_frame(scaled_temp, scaled); +#endif // CONFIG_VP9_HIGHBITDEPTH + return scaled; + } else { + return unscaled; + } +} + YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, @@ -3649,13 +3941,17 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { #if CONFIG_VP9_HIGHBITDEPTH - if (use_normative_scaler) + if (use_normative_scaler && + unscaled->y_width <= (scaled->y_width << 1) && + unscaled->y_height <= (scaled->y_height << 1)) scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth); else scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); #else - if (use_normative_scaler) - scale_and_extend_frame(unscaled, scaled); + if (use_normative_scaler && + unscaled->y_width <= (scaled->y_width << 1) && + unscaled->y_height <= (scaled->y_height << 1)) + vp9_scale_and_extend_frame(unscaled, scaled); else scale_and_extend_frame_nonnormative(unscaled, scaled); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -3797,14 +4093,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } // For 1 pass CBR, check if we are dropping this frame. - // Never drop on key frame. + // For spatial layers, for now only check for frame-dropping on first spatial + // layer, and if decision is to drop, we drop whole super-frame. if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && cm->frame_type != KEY_FRAME) { - if (vp9_rc_drop_frame(cpi)) { + if (vp9_rc_drop_frame(cpi) || + (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) { vp9_rc_postencode_update_drop_frame(cpi); ++cm->current_video_frame; cpi->ext_refresh_frame_flags_pending = 0; + cpi->svc.rc_drop_superframe = 1; + // TODO(marpan): Advancing the svc counters on dropped frames can break + // the referencing scheme for the fixed svc patterns defined in + // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but + // for now, don't advance the svc frame counters on dropped frame. + // if (cpi->use_svc) + // vp9_inc_frame_in_layer(cpi); return; } } @@ -4020,13 +4325,16 @@ static void check_initial_width(VP9_COMP *cpi, int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; struct vpx_usec_timer timer; int res = 0; const int subsampling_x = sd->subsampling_x; const int subsampling_y = sd->subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH - const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH; + const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; +#endif + +#if CONFIG_VP9_HIGHBITDEPTH check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #else check_initial_width(cpi, subsampling_x, subsampling_y); @@ -4171,6 +4479,124 @@ static void adjust_image_stat(double y, double u, double v, double all, } #endif // CONFIG_INTERNAL_STATS +static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) { + VP9_COMMON *const cm = &cpi->common; + Vp9LevelInfo *const level_info = &cpi->level_info; + Vp9LevelSpec *const level_spec = &level_info->level_spec; + Vp9LevelStats *const level_stats = &level_info->level_stats; + int i, idx; + uint64_t luma_samples, dur_end; + const uint32_t luma_pic_size = cm->width * cm->height; + double cpb_data_size; + + vpx_clear_system_state(); + + // update level_stats + level_stats->total_compressed_size += *size; + if (cm->show_frame) { + level_stats->total_uncompressed_size += + luma_pic_size + + 2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y)); + level_stats->time_encoded = + (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / + (double)TICKS_PER_SEC; + } + + if (arf_src_index > 0) { + if (!level_stats->seen_first_altref) { + level_stats->seen_first_altref = 1; + } else if (level_stats->frames_since_last_altref < + level_spec->min_altref_distance) { + level_spec->min_altref_distance = level_stats->frames_since_last_altref; + } + level_stats->frames_since_last_altref = 0; + } else { + ++level_stats->frames_since_last_altref; + } + + if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) { + idx = (level_stats->frame_window_buffer.start + + level_stats->frame_window_buffer.len++) % FRAME_WINDOW_SIZE; + } else { + idx = level_stats->frame_window_buffer.start; + level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE; + } + level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen; + level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size); + level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size; + + if (cm->frame_type == KEY_FRAME) { + level_stats->ref_refresh_map = 0; + } else { + int count = 0; + level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi); + // Also need to consider the case where the encoder refers to a buffer + // that has been implicitly refreshed after encoding a keyframe. + if (!cm->intra_only) { + level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx); + level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx); + level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx); + } + for (i = 0; i < REF_FRAMES; ++i) { + count += (level_stats->ref_refresh_map >> i) & 1; + } + if (count > level_spec->max_ref_frame_buffers) { + level_spec->max_ref_frame_buffers = count; + } + } + + // update average_bitrate + level_spec->average_bitrate = + (double)level_stats->total_compressed_size / 125.0 / + level_stats->time_encoded; + + // update max_luma_sample_rate + luma_samples = 0; + for (i = 0; i < level_stats->frame_window_buffer.len; ++i) { + idx = (level_stats->frame_window_buffer.start + + level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE; + if (i == 0) { + dur_end = level_stats->frame_window_buffer.buf[idx].ts; + } + if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >= + TICKS_PER_SEC) { + break; + } + luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples; + } + if (luma_samples > level_spec->max_luma_sample_rate) { + level_spec->max_luma_sample_rate = luma_samples; + } + + // update max_cpb_size + cpb_data_size = 0; + for (i = 0; i < CPB_WINDOW_SIZE; ++i) { + if (i >= level_stats->frame_window_buffer.len) break; + idx = (level_stats->frame_window_buffer.start + + level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE; + cpb_data_size += level_stats->frame_window_buffer.buf[idx].size; + } + cpb_data_size = cpb_data_size / 125.0; + if (cpb_data_size > level_spec->max_cpb_size) { + level_spec->max_cpb_size = cpb_data_size; + } + + // update max_luma_picture_size + if (luma_pic_size > level_spec->max_luma_picture_size) { + level_spec->max_luma_picture_size = luma_pic_size; + } + + // update compression_ratio + level_spec->compression_ratio = + (double)level_stats->total_uncompressed_size * cm->bit_depth / + level_stats->total_compressed_size / 8.0; + + // update max_col_tiles + if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) { + level_spec->max_col_tiles = (1 << cm->log2_tile_cols); + } +} + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { @@ -4228,6 +4654,20 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, arf_src_index = 0; if (arf_src_index) { + for (i = 0; i <= arf_src_index; ++i) { + struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i); + // Avoid creating an alt-ref if there's a forced keyframe pending. + if (e == NULL) { + break; + } else if (e->flags == VPX_EFLAG_FORCE_KF) { + arf_src_index = 0; + flush = 1; + break; + } + } + } + + if (arf_src_index) { assert(arf_src_index <= rc->frames_to_key); if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { @@ -4247,7 +4687,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1; #endif - if (oxcf->arnr_max_frames > 0) { + if ((oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0)) { // Produce the filtered ARF frame. vp9_temporal_filter(cpi, arf_src_index); vpx_extend_frame_borders(&cpi->alt_ref_buffer); @@ -4427,6 +4867,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame) generate_psnr_packet(cpi); + if (cpi->keep_level_stats && oxcf->pass != 1) + update_level_info(cpi, size, arf_src_index); + #if CONFIG_INTERNAL_STATS if (oxcf->pass != 1) { diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h index 159c03aa8..128b62328 100644 --- a/libvpx/vp9/encoder/vp9_encoder.h +++ b/libvpx/vp9/encoder/vp9_encoder.h @@ -20,6 +20,7 @@ #include "vpx_dsp/ssim.h" #endif #include "vpx_dsp/variance.h" +#include "vpx_ports/system_state.h" #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_alloccommon.h" @@ -35,6 +36,7 @@ #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_mbgraph.h" #include "vp9/encoder/vp9_mcomp.h" +#include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" @@ -50,6 +52,9 @@ extern "C" { #endif +// vp9 uses 10,000,000 ticks/second as time stamp +#define TICKS_PER_SEC 10000000 + typedef struct { int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -111,6 +116,7 @@ typedef enum { VARIANCE_AQ = 1, COMPLEXITY_AQ = 2, CYCLIC_REFRESH_AQ = 3, + EQUATOR360_AQ = 4, AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; @@ -127,7 +133,7 @@ typedef struct VP9EncoderConfig { int height; // height of data passed to the compressor unsigned int input_bit_depth; // Input bit depth. double init_framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in kilobits per second + int64_t target_bandwidth; // bandwidth to be used in bits per second int noise_sensitivity; // pre processing blur: recommendation 0 int sharpness; // sharpening output: recommendation 0: @@ -225,6 +231,8 @@ typedef struct VP9EncoderConfig { int max_threads; + int target_level; + vpx_fixed_buf_t two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; @@ -259,6 +267,8 @@ typedef struct RD_COUNTS { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; int64_t comp_pred_diff[REFERENCE_MODES]; int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + int m_search_count; + int ex_search_count; } RD_COUNTS; typedef struct ThreadData { @@ -291,6 +301,69 @@ typedef struct IMAGE_STAT { double worst; } ImageStat; +#define CPB_WINDOW_SIZE 4 +#define FRAME_WINDOW_SIZE 128 +#define SAMPLE_RATE_GRACE_P 0.015 +#define VP9_LEVELS 14 + +typedef enum { + LEVEL_UNKNOWN = 0, + LEVEL_1 = 10, + LEVEL_1_1 = 11, + LEVEL_2 = 20, + LEVEL_2_1 = 21, + LEVEL_3 = 30, + LEVEL_3_1 = 31, + LEVEL_4 = 40, + LEVEL_4_1 = 41, + LEVEL_5 = 50, + LEVEL_5_1 = 51, + LEVEL_5_2 = 52, + LEVEL_6 = 60, + LEVEL_6_1 = 61, + LEVEL_6_2 = 62, + LEVEL_MAX = 255 +} VP9_LEVEL; + +typedef struct { + VP9_LEVEL level; + uint64_t max_luma_sample_rate; + uint32_t max_luma_picture_size; + double average_bitrate; // in kilobits per second + double max_cpb_size; // in kilobits + double compression_ratio; + uint8_t max_col_tiles; + uint32_t min_altref_distance; + uint8_t max_ref_frame_buffers; +} Vp9LevelSpec; + +typedef struct { + int64_t ts; // timestamp + uint32_t luma_samples; + uint32_t size; // in bytes +} FrameRecord; + +typedef struct { + FrameRecord buf[FRAME_WINDOW_SIZE]; + uint8_t start; + uint8_t len; +} FrameWindowBuffer; + +typedef struct { + uint8_t seen_first_altref; + uint32_t frames_since_last_altref; + uint64_t total_compressed_size; + uint64_t total_uncompressed_size; + double time_encoded; // in seconds + FrameWindowBuffer frame_window_buffer; + int ref_refresh_map; +} Vp9LevelStats; + +typedef struct { + Vp9LevelStats level_stats; + Vp9LevelSpec level_spec; +} Vp9LevelInfo; + typedef struct VP9_COMP { QUANTS quants; ThreadData td; @@ -335,7 +408,7 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tile_tok[4][1 << 6]; - unsigned int tok_count[4][1 << 6]; + uint32_t tok_count[4][1 << 6]; // Ambient reconstruction err target for force key frames int64_t ambient_err; @@ -367,7 +440,7 @@ typedef struct VP9_COMP { SPEED_FEATURES sf; - unsigned int max_mv_magnitude; + uint32_t max_mv_magnitude; int mv_step_param; int allow_comp_inter_inter; @@ -379,10 +452,10 @@ typedef struct VP9_COMP { // clips, and 300 for < HD clips. int encode_breakout; - unsigned char *segmentation_map; + uint8_t *segmentation_map; // segment threashold for encode breakout - int segment_encode_breakout[MAX_SEGMENTS]; + int segment_encode_breakout[MAX_SEGMENTS]; CYCLIC_REFRESH *cyclic_refresh; ActiveMap active_map; @@ -404,11 +477,10 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG alt_ref_buffer; - #if CONFIG_INTERNAL_STATS unsigned int mode_chosen_counts[MAX_MODES]; - int count; + int count; uint64_t total_sq_error; uint64_t total_samples; ImageStat psnr; @@ -469,7 +541,7 @@ typedef struct VP9_COMP { int mbmode_cost[INTRA_MODES]; unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; - int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; + int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; @@ -484,12 +556,22 @@ typedef struct VP9_COMP { int resize_pending; int resize_state; + int external_resize; int resize_scale_num; int resize_scale_den; int resize_avg_qp; int resize_buffer_underflow; int resize_count; + int use_skin_detection; + + int target_level; + + NOISE_ESTIMATE noise_estimate; + + // Count on how many consecutive times a block uses small/zeromv for encoding. + uint8_t *consec_zero_mv; + // VAR_BASED_PARTITION thresholds // 0 - threshold_64x64; 1 - threshold_32x32; // 2 - threshold_16x16; 3 - vbp_threshold_8x8; @@ -503,6 +585,9 @@ typedef struct VP9_COMP { VPxWorker *workers; struct EncWorkerData *tile_thr_data; VP9LfSync lf_row_sync; + + int keep_level_stats; + Vp9LevelInfo level_info; } VP9_COMP; void vp9_initialize_enc(void); @@ -614,6 +699,11 @@ void vp9_update_reference_frames(VP9_COMP *cpi); void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); +YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm, + YV12_BUFFER_CONFIG *unscaled, + YV12_BUFFER_CONFIG *scaled, + YV12_BUFFER_CONFIG *scaled_temp); + YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, @@ -653,6 +743,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) { return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL; } +VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); + void vp9_new_framerate(VP9_COMP *cpi, double framerate); #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) diff --git a/libvpx/vp9/encoder/vp9_ethread.c b/libvpx/vp9/encoder/vp9_ethread.c index ad25712be..1d1926cae 100644 --- a/libvpx/vp9/encoder/vp9_ethread.c +++ b/libvpx/vp9/encoder/vp9_ethread.c @@ -30,6 +30,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { for (n = 0; n < ENTROPY_TOKENS; n++) td->rd_counts.coef_counts[i][j][k][l][m][n] += td_t->rd_counts.coef_counts[i][j][k][l][m][n]; + + // Counts of all motion searches and exhuastive mesh searches. + td->rd_counts.m_search_count += td_t->rd_counts.m_search_count; + td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count; } static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) { diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index 30738b52d..53a3ec7de 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -41,11 +41,8 @@ #define OUTPUT_FPF 0 #define ARF_STATS_OUTPUT 0 -#define GROUP_ADAPTIVE_MAXQ 1 - #define BOOST_BREAKOUT 12.5 #define BOOST_FACTOR 12.5 -#define ERR_DIVISOR 128.0 #define FACTOR_PT_LOW 0.70 #define FACTOR_PT_HIGH 0.90 #define FIRST_PASS_Q 10.0 @@ -65,7 +62,7 @@ #define NCOUNT_INTRA_THRESH 8192 #define NCOUNT_INTRA_FACTOR 3 -#define NCOUNT_FRAME_II_THRESH 5.0 + #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) @@ -115,7 +112,7 @@ static void output_stats(FIRSTPASS_STATS *stats, fprintf(fpfile, "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf" "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf" - "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n", + "%12.4lf %12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n", stats->frame, stats->weight, stats->intra_error, @@ -126,6 +123,7 @@ static void output_stats(FIRSTPASS_STATS *stats, stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct, + stats->intra_smooth_pct, stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr, @@ -155,82 +153,85 @@ static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm, #endif static void zero_stats(FIRSTPASS_STATS *section) { - section->frame = 0.0; - section->weight = 0.0; - section->intra_error = 0.0; - section->coded_error = 0.0; - section->sr_coded_error = 0.0; - section->pcnt_inter = 0.0; - section->pcnt_motion = 0.0; - section->pcnt_second_ref = 0.0; - section->pcnt_neutral = 0.0; - section->intra_skip_pct = 0.0; + section->frame = 0.0; + section->weight = 0.0; + section->intra_error = 0.0; + section->coded_error = 0.0; + section->sr_coded_error = 0.0; + section->pcnt_inter = 0.0; + section->pcnt_motion = 0.0; + section->pcnt_second_ref = 0.0; + section->pcnt_neutral = 0.0; + section->intra_skip_pct = 0.0; + section->intra_smooth_pct = 0.0; section->inactive_zone_rows = 0.0; section->inactive_zone_cols = 0.0; - section->MVr = 0.0; - section->mvr_abs = 0.0; - section->MVc = 0.0; - section->mvc_abs = 0.0; - section->MVrv = 0.0; - section->MVcv = 0.0; - section->mv_in_out_count = 0.0; - section->new_mv_count = 0.0; - section->count = 0.0; - section->duration = 1.0; - section->spatial_layer_id = 0; + section->MVr = 0.0; + section->mvr_abs = 0.0; + section->MVc = 0.0; + section->mvc_abs = 0.0; + section->MVrv = 0.0; + section->MVcv = 0.0; + section->mv_in_out_count = 0.0; + section->new_mv_count = 0.0; + section->count = 0.0; + section->duration = 1.0; + section->spatial_layer_id = 0; } static void accumulate_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { - section->frame += frame->frame; - section->weight += frame->weight; - section->spatial_layer_id = frame->spatial_layer_id; - section->intra_error += frame->intra_error; - section->coded_error += frame->coded_error; - section->sr_coded_error += frame->sr_coded_error; - section->pcnt_inter += frame->pcnt_inter; - section->pcnt_motion += frame->pcnt_motion; - section->pcnt_second_ref += frame->pcnt_second_ref; - section->pcnt_neutral += frame->pcnt_neutral; - section->intra_skip_pct += frame->intra_skip_pct; + section->frame += frame->frame; + section->weight += frame->weight; + section->spatial_layer_id = frame->spatial_layer_id; + section->intra_error += frame->intra_error; + section->coded_error += frame->coded_error; + section->sr_coded_error += frame->sr_coded_error; + section->pcnt_inter += frame->pcnt_inter; + section->pcnt_motion += frame->pcnt_motion; + section->pcnt_second_ref += frame->pcnt_second_ref; + section->pcnt_neutral += frame->pcnt_neutral; + section->intra_skip_pct += frame->intra_skip_pct; + section->intra_smooth_pct += frame->intra_smooth_pct; section->inactive_zone_rows += frame->inactive_zone_rows; section->inactive_zone_cols += frame->inactive_zone_cols; - section->MVr += frame->MVr; - section->mvr_abs += frame->mvr_abs; - section->MVc += frame->MVc; - section->mvc_abs += frame->mvc_abs; - section->MVrv += frame->MVrv; - section->MVcv += frame->MVcv; - section->mv_in_out_count += frame->mv_in_out_count; - section->new_mv_count += frame->new_mv_count; - section->count += frame->count; - section->duration += frame->duration; + section->MVr += frame->MVr; + section->mvr_abs += frame->mvr_abs; + section->MVc += frame->MVc; + section->mvc_abs += frame->mvc_abs; + section->MVrv += frame->MVrv; + section->MVcv += frame->MVcv; + section->mv_in_out_count += frame->mv_in_out_count; + section->new_mv_count += frame->new_mv_count; + section->count += frame->count; + section->duration += frame->duration; } static void subtract_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { - section->frame -= frame->frame; - section->weight -= frame->weight; - section->intra_error -= frame->intra_error; - section->coded_error -= frame->coded_error; - section->sr_coded_error -= frame->sr_coded_error; - section->pcnt_inter -= frame->pcnt_inter; - section->pcnt_motion -= frame->pcnt_motion; - section->pcnt_second_ref -= frame->pcnt_second_ref; - section->pcnt_neutral -= frame->pcnt_neutral; - section->intra_skip_pct -= frame->intra_skip_pct; + section->frame -= frame->frame; + section->weight -= frame->weight; + section->intra_error -= frame->intra_error; + section->coded_error -= frame->coded_error; + section->sr_coded_error -= frame->sr_coded_error; + section->pcnt_inter -= frame->pcnt_inter; + section->pcnt_motion -= frame->pcnt_motion; + section->pcnt_second_ref -= frame->pcnt_second_ref; + section->pcnt_neutral -= frame->pcnt_neutral; + section->intra_skip_pct -= frame->intra_skip_pct; + section->intra_smooth_pct -= frame->intra_smooth_pct; section->inactive_zone_rows -= frame->inactive_zone_rows; section->inactive_zone_cols -= frame->inactive_zone_cols; - section->MVr -= frame->MVr; - section->mvr_abs -= frame->mvr_abs; - section->MVc -= frame->MVc; - section->mvc_abs -= frame->mvc_abs; - section->MVrv -= frame->MVrv; - section->MVcv -= frame->MVcv; - section->mv_in_out_count -= frame->mv_in_out_count; - section->new_mv_count -= frame->new_mv_count; - section->count -= frame->count; - section->duration -= frame->duration; + section->MVr -= frame->MVr; + section->mvr_abs -= frame->mvr_abs; + section->MVc -= frame->MVc; + section->mvc_abs -= frame->mvc_abs; + section->MVrv -= frame->MVrv; + section->MVcv -= frame->MVcv; + section->mv_in_out_count -= frame->mv_in_out_count; + section->new_mv_count -= frame->new_mv_count; + section->count -= frame->count; + section->duration -= frame->duration; } // Calculate an active area of the image that discounts formatting @@ -396,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MV tmp_mv = {0, 0}; MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3}; int num00, tmp_err, n; - const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY; @@ -490,7 +491,63 @@ static void set_first_pass_params(VP9_COMP *cpi) { cpi->rc.frames_to_key = INT_MAX; } +// This threshold is used to track blocks where to all intents and purposes +// the intra prediction error 0. Though the metric we test against +// is technically a sse we are mainly interested in blocks where all the pixels +// in the 8 bit domain have an error of <= 1 (where error = sse) so a +// linear scaling for 10 and 12 bit gives similar results. #define UL_INTRA_THRESH 50 +static int get_ul_intra_threshold(VP9_COMMON *cm) { + int ret_val = UL_INTRA_THRESH; +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + switch (cm->bit_depth) { + case VPX_BITS_8: + ret_val = UL_INTRA_THRESH; + break; + case VPX_BITS_10: + ret_val = UL_INTRA_THRESH >> 2; + break; + case VPX_BITS_12: + ret_val = UL_INTRA_THRESH >> 4; + break; + default: + assert(0 && "cm->bit_depth should be VPX_BITS_8, " + "VPX_BITS_10 or VPX_BITS_12"); + } + } +#else + (void) cm; +#endif // CONFIG_VP9_HIGHBITDEPTH + return ret_val; +} + +#define SMOOTH_INTRA_THRESH 4000 +static int get_smooth_intra_threshold(VP9_COMMON *cm) { + int ret_val = SMOOTH_INTRA_THRESH; +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + switch (cm->bit_depth) { + case VPX_BITS_8: + ret_val = SMOOTH_INTRA_THRESH; + break; + case VPX_BITS_10: + ret_val = SMOOTH_INTRA_THRESH >> 2; + break; + case VPX_BITS_12: + ret_val = SMOOTH_INTRA_THRESH >> 4; + break; + default: + assert(0 && "cm->bit_depth should be VPX_BITS_8, " + "VPX_BITS_10 or VPX_BITS_12"); + } + } +#else + (void) cm; +#endif // CONFIG_VP9_HIGHBITDEPTH + return ret_val; +} + #define INVALID_ROW -1 void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { int mb_row, mb_col; @@ -517,6 +574,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { const int intrapenalty = INTRA_MODE_PENALTY; double neutral_count; int intra_skip_count = 0; + int intra_smooth_count = 0; int image_data_start_row = INVALID_ROW; int new_mv_count = 0; int sum_in_vectors = 0; @@ -535,6 +593,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { double intra_factor; double brightness_factor; BufferPool *const pool = cm->buffer_pool; + MODE_INFO mi_above, mi_left; // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); @@ -636,7 +695,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { MV best_ref_mv = {0, 0}; // Reset above block coeffs. - xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height); @@ -662,20 +720,25 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - xd->mi[0]->mbmi.sb_type = bsize; - xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->sb_type = bsize; + xd->mi[0]->ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols); + // Are edges available for intra prediction? + // Since the firstpass does not populate the mi_grid_visible, + // above_mi/left_mi must be overwritten with a nonzero value when edges + // are available. Required by vp9_predict_intra_block(). + xd->above_mi = (mb_row != 0) ? &mi_above : NULL; + xd->left_mi = (mb_col > tile.mi_col_start) ? &mi_left : NULL; // Do intra 16x16 prediction. x->skip_encode = 0; - xd->mi[0]->mbmi.mode = DC_PRED; - xd->mi[0]->mbmi.tx_size = use_dc_pred ? + xd->mi[0]->mode = DC_PRED; + xd->mi[0]->tx_size = use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; - vp9_encode_intra_block_plane(x, bsize, 0); + vp9_encode_intra_block_plane(x, bsize, 0, 0); this_error = vpx_get_mb_ss(x->plane[0].src_diff); // Keep a record of blocks that have almost no intra error residual @@ -683,11 +746,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // domain). In natural videos this is uncommon, but it is much more // common in animations, graphics and screen content, so may be used // as a signal to detect these types of content. - if (this_error < UL_INTRA_THRESH) { + if (this_error < get_ul_intra_threshold(cm)) { ++intra_skip_count; } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) { image_data_start_row = mb_row; } + if (this_error < get_smooth_intra_threshold(cm)) { + ++intra_smooth_count; + } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { @@ -897,11 +963,11 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { mv.row *= 8; mv.col *= 8; this_error = motion_error; - xd->mi[0]->mbmi.mode = NEWMV; - xd->mi[0]->mbmi.mv[0].as_mv = mv; - xd->mi[0]->mbmi.tx_size = TX_4X4; - xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi[0]->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mode = NEWMV; + xd->mi[0]->mv[0].as_mv = mv; + xd->mi[0]->tx_size = TX_4X4; + xd->mi[0]->ref_frame[0] = LAST_FRAME; + xd->mi[0]->ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); sum_mvr += mv.row; @@ -1053,8 +1119,10 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { fps.pcnt_second_ref = (double)second_ref_count / num_mbs; fps.pcnt_neutral = (double)neutral_count / num_mbs; fps.intra_skip_pct = (double)intra_skip_count / num_mbs; + fps.intra_smooth_pct = (double)intra_smooth_count / num_mbs; fps.inactive_zone_rows = (double)image_data_start_row; - fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix + // Currently set to 0 as most issues relate to letter boxing. + fps.inactive_zone_cols = (double)0; if (mvcount > 0) { fps.MVr = (double)sum_mvr / mvcount; @@ -1080,10 +1148,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { fps.pcnt_motion = 0.0; } - // TODO(paulwilkins): Handle the case when duration is set to 0, or - // something less than the full time between subsequent values of - // cpi->source_time_stamp. - fps.duration = (double)(source->ts_end - source->ts_start); + // Dont allow a value of 0 for duration. + // (Section duration is also defaulted to minimum of 1.0). + fps.duration = VPXMAX(1.0, (double)(source->ts_end - source->ts_start)); // Don't want to do output stats with a stack variable! twopass->this_frame_stats = fps; @@ -1171,18 +1238,15 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -// Larger image formats are expected to be a little harder to code relatively -// given the same prediction error score. This in part at least relates to the -// increased size and hence coding cost of motion vectors. -#define EDIV_SIZE_FACTOR 800 - -static int get_twopass_worst_quality(const VP9_COMP *cpi, +#define ERR_DIVISOR 115.0 +static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, double inactive_zone, - int section_target_bandwidth, - double group_weight_factor) { + int section_target_bandwidth) { const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; + TWO_PASS *const twopass = &cpi->twopass; + // Clamp the target rate to VBR min / max limts. const int target_rate = vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth); @@ -1197,29 +1261,36 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); const double av_err_per_mb = section_err / active_mbs; const double speed_term = 1.0 + 0.04 * oxcf->speed; - const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR; + double last_group_rate_err; const int target_norm_bits_per_mb = ((uint64_t)target_rate << BPER_MB_NORMBITS) / active_mbs; - int q; int is_svc_upper_layer = 0; if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) is_svc_upper_layer = 1; + // based on recent history adjust expectations of bits per macroblock. + last_group_rate_err = (double)twopass->rolling_arf_group_actual_bits / + DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits); + last_group_rate_err = + VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err)); + twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0; + twopass->bpm_factor = + VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor)); // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double factor = calc_correction_factor(av_err_per_mb, - ERR_DIVISOR - ediv_size_correction, + ERR_DIVISOR, is_svc_upper_layer ? SVC_FACTOR_PT_LOW : FACTOR_PT_LOW, FACTOR_PT_HIGH, q, cpi->common.bit_depth); const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q, - factor * speed_term * group_weight_factor, + factor * speed_term * cpi->twopass.bpm_factor, cpi->common.bit_depth); if (bits_per_mb <= target_norm_bits_per_mb) break; @@ -1270,6 +1341,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int is_two_pass_svc = (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1); + RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass; double frame_rate; @@ -1326,26 +1398,33 @@ void vp9_init_second_pass(VP9_COMP *cpi) { } // Reset the vbr bits off target counters - cpi->rc.vbr_bits_off_target = 0; - cpi->rc.vbr_bits_off_target_fast = 0; - - cpi->rc.rate_error_estimate = 0; + rc->vbr_bits_off_target = 0; + rc->vbr_bits_off_target_fast = 0; + rc->rate_error_estimate = 0; // Static sequence monitor variables. twopass->kf_zeromotion_pct = 100; twopass->last_kfgroup_zeromotion_pct = 100; + // Initialize bits per macro_block estimate correction factor. + twopass->bpm_factor = 1.0; + // Initiallize actual and target bits counters for ARF groups so that + // at the start we have a neutral bpm adjustment. + twopass->rolling_arf_group_target_bits = 1; + twopass->rolling_arf_group_actual_bits = 1; + if (oxcf->resize_mode != RESIZE_NONE) { init_subsampling(cpi); } } #define SR_DIFF_PART 0.0015 -#define MOTION_AMP_PART 0.003 #define INTRA_PART 0.005 #define DEFAULT_DECAY_LIMIT 0.75 #define LOW_SR_DIFF_TRHESH 0.1 #define SR_DIFF_MAX 128.0 +#define LOW_CODED_ERR_PER_MB 10.0 +#define NCOUNT_FRAME_II_THRESH 6.0 static double get_sr_decay_rate(const VP9_COMP *cpi, const FIRSTPASS_STATS *frame) { @@ -1356,12 +1435,15 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, double sr_decay = 1.0; double modified_pct_inter; double modified_pcnt_intra; - const double motion_amplitude_factor = - frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2); + const double motion_amplitude_part = + frame->pcnt_motion * + ((frame->mvc_abs + frame->mvr_abs) / + (cpi->initial_height + cpi->initial_width)); modified_pct_inter = frame->pcnt_inter; - if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < - (double)NCOUNT_FRAME_II_THRESH) { + if (((frame->coded_error / num_mbs) > LOW_CODED_ERR_PER_MB) && + ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < + (double)NCOUNT_FRAME_II_THRESH)) { modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral; } modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); @@ -1370,7 +1452,7 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, if ((sr_diff > LOW_SR_DIFF_TRHESH)) { sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX); sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - - (MOTION_AMP_PART * motion_amplitude_factor) - + motion_amplitude_part - (INTRA_PART * modified_pcnt_intra); } return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter)); @@ -1722,15 +1804,13 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, gf_group->update_type[0] = OVERLAY_UPDATE; gf_group->rf_level[0] = INTER_NORMAL; gf_group->bit_allocation[0] = 0; - gf_group->arf_update_idx[0] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; } else { gf_group->update_type[0] = GF_UPDATE; gf_group->rf_level[0] = GF_ARF_STD; gf_group->bit_allocation[0] = gf_arf_bits; - gf_group->arf_update_idx[0] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; } + gf_group->arf_update_idx[0] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; // Step over the golden frame / overlay frame if (EOF == input_stats(twopass, &frame_stats)) @@ -1857,9 +1937,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double boost_score = 0.0; double old_boost_score = 0.0; double gf_group_err = 0.0; -#if GROUP_ADAPTIVE_MAXQ double gf_group_raw_error = 0.0; -#endif double gf_group_skip_pct = 0.0; double gf_group_inactive_zone_rows = 0.0; double gf_first_frame_err = 0.0; @@ -1909,9 +1987,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // the error score / cost of this frame has already been accounted for. if (arf_active_or_kf) { gf_group_err -= gf_first_frame_err; -#if GROUP_ADAPTIVE_MAXQ gf_group_raw_error -= this_frame->coded_error; -#endif gf_group_skip_pct -= this_frame->intra_skip_pct; gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows; } @@ -1929,7 +2005,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex, cpi->common.bit_depth)); - active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200); + active_min_gf_interval = + rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); if (active_min_gf_interval > rc->max_gf_interval) active_min_gf_interval = rc->max_gf_interval; @@ -1940,14 +2017,20 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // bits to spare and are better with a smaller interval and smaller boost. // At high Q when there are few bits to spare we are better with a longer // interval to spread the cost of the GF. - active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6)); - if (active_max_gf_interval < active_min_gf_interval) - active_max_gf_interval = active_min_gf_interval; + active_max_gf_interval = + 12 + arf_active_or_kf + VPXMIN(4, (int_lbq / 6)); - if (active_max_gf_interval > rc->max_gf_interval) - active_max_gf_interval = rc->max_gf_interval; + // We have: active_min_gf_interval <= rc->max_gf_interval if (active_max_gf_interval < active_min_gf_interval) active_max_gf_interval = active_min_gf_interval; + else if (active_max_gf_interval > rc->max_gf_interval) + active_max_gf_interval = rc->max_gf_interval; + + // Would the active max drop us out just before the near the next kf? + if ((active_max_gf_interval <= rc->frames_to_key) && + (active_max_gf_interval >= + (rc->frames_to_key - rc->min_gf_interval))) + active_max_gf_interval = rc->frames_to_key / 2; } } @@ -1958,9 +2041,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Accumulate error score of frames in this gf group. mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame); gf_group_err += mod_frame_err; -#if GROUP_ADAPTIVE_MAXQ gf_group_raw_error += this_frame->coded_error; -#endif gf_group_skip_pct += this_frame->intra_skip_pct; gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; @@ -2005,11 +2086,13 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break out conditions. if ( // Break at active_max_gf_interval unless almost totally static. - (i >= (active_max_gf_interval + arf_active_or_kf) && - zero_motion_accumulator < 0.995) || + ((i >= active_max_gf_interval) && + (zero_motion_accumulator < 0.995)) || ( // Don't break out with a very short interval. - (i >= active_min_gf_interval + arf_active_or_kf) && + (i >= active_min_gf_interval) && + // If possible dont break very close to a kf + ((rc->frames_to_key - i) >= rc->min_gf_interval) && (!flash_detected) && ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || (abs_mv_in_out_accumulator > 3.0) || @@ -2023,8 +2106,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { old_boost_score = boost_score; } - twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); - // Was the group length constrained by the requirement for a new KF? rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; @@ -2060,9 +2141,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (EOF == input_stats(twopass, this_frame)) break; gf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame); -#if GROUP_ADAPTIVE_MAXQ gf_group_raw_error += this_frame->coded_error; -#endif gf_group_skip_pct += this_frame->intra_skip_pct; gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; } @@ -2077,7 +2156,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the bits to be allocated to the gf/arf group as a whole gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); -#if GROUP_ADAPTIVE_MAXQ // Calculate an estimate of the maxq needed for the group. // We are more agressive about correcting for sections // where there could be significant overshoot than for easier @@ -2092,26 +2170,13 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { const double group_av_inactive_zone = ((gf_group_inactive_zone_rows * 2) / (rc->baseline_gf_interval * (double)cm->mb_rows)); - - int tmp_q; - // rc factor is a weight factor that corrects for local rate control drift. - double rc_factor = 1.0; - if (rc->rate_error_estimate > 0) { - rc_factor = VPXMAX(RC_FACTOR_MIN, - (double)(100 - rc->rate_error_estimate) / 100.0); - } else { - rc_factor = VPXMIN(RC_FACTOR_MAX, - (double)(100 - rc->rate_error_estimate) / 100.0); - } - tmp_q = - get_twopass_worst_quality(cpi, group_av_err, - (group_av_skip_pct + group_av_inactive_zone), - vbr_group_bits_per_frame, - twopass->kfgroup_inter_fraction * rc_factor); + int tmp_q = + get_twopass_worst_quality(cpi, group_av_err, + (group_av_skip_pct + group_av_inactive_zone), + vbr_group_bits_per_frame); twopass->active_worst_quality = - VPXMAX(tmp_q, twopass->active_worst_quality >> 1); + (tmp_q + (twopass->active_worst_quality * 3)) >> 2; } -#endif // Calculate the extra bits to be used for boosted frame(s) gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, @@ -2151,6 +2216,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Default to starting GF groups at normal frame size. cpi->rc.next_frame_size_selector = UNSCALED; } + + // Reset rolling actual and target bits counters for ARF groups. + twopass->rolling_arf_group_target_bits = 0; + twopass->rolling_arf_group_actual_bits = 0; } // Threshold for use of the lagging second reference frame. High second ref @@ -2265,6 +2334,8 @@ static int test_candidate_kf(TWO_PASS *twopass, return is_viable_kf; } +#define FRAMES_TO_CHECK_DECAY 8 + static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; @@ -2283,7 +2354,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double boost_score = 0.0; double kf_mod_err = 0.0; double kf_group_err = 0.0; - double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; vp9_zero(next_frame); @@ -2310,6 +2381,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame); + // Initialize the decay rates for the recent frames to check + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) + recent_loop_decay[j] = 1.0; + // Find the next keyframe. i = 0; while (twopass->stats_in < twopass->stats_in_end && @@ -2336,9 +2411,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // We want to know something about the recent past... rather than // as used elsewhere where we are concerned with decay in prediction // quality since the last GF or KF. - recent_loop_decay[i % 8] = loop_decay_rate; + recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate; decay_accumulator = 1.0; - for (j = 0; j < 8; ++j) + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) decay_accumulator *= recent_loop_decay[j]; // Special check for transition or high motion followed by a @@ -2482,16 +2557,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost, twopass->kf_group_bits); - // Work out the fraction of the kf group bits reserved for the inter frames - // within the group after discounting the bits for the kf itself. - if (twopass->kf_group_bits) { - twopass->kfgroup_inter_fraction = - (double)(twopass->kf_group_bits - kf_bits) / - (double)twopass->kf_group_bits; - } else { - twopass->kfgroup_inter_fraction = 1.0; - } - twopass->kf_group_bits -= kf_bits; // Save the bits to spend on the key frame. @@ -2585,21 +2650,12 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; - int frames_left; FIRSTPASS_STATS this_frame; int target_rate; LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; - if (lc != NULL) { - frames_left = (int)(twopass->total_stats.count - - lc->current_video_frame_in_layer); - } else { - frames_left = (int)(twopass->total_stats.count - - cm->current_video_frame); - } - if (!twopass->stats_in) return; @@ -2641,6 +2697,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { twopass->active_worst_quality = cpi->oxcf.cq_level; } else if (cm->current_video_frame == 0 || (lc != NULL && lc->current_video_frame_in_layer == 0)) { + const int frames_left = (int)(twopass->total_stats.count - + ((lc != NULL) ? lc->current_video_frame_in_layer + : cm->current_video_frame)); // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2652,10 +2711,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { const double section_inactive_zone = (twopass->total_left_stats.inactive_zone_rows * 2) / ((double)cm->mb_rows * section_length); - const int tmp_q = - get_twopass_worst_quality(cpi, section_error, - section_intra_skip + section_inactive_zone, - section_target_bandwidth, DEFAULT_GRP_WEIGHT); + int tmp_q; + + tmp_q = get_twopass_worst_quality(cpi, section_error, + section_intra_skip + section_inactive_zone, section_target_bandwidth); twopass->active_worst_quality = tmp_q; twopass->baseline_active_worst_quality = tmp_q; @@ -2749,6 +2808,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // applied when combining MB error values for the frame. twopass->mb_av_energy = log(((this_frame.intra_error * 256.0) / num_mbs) + 1.0); + twopass->mb_smooth_pct = this_frame.intra_smooth_pct; } // Update the total stats remaining structure. @@ -2761,6 +2821,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { void vp9_twopass_postencode_update(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; RATE_CONTROL *const rc = &cpi->rc; + VP9_COMMON *const cm = &cpi->common; const int bits_used = rc->base_frame_target; // VBR correction is done through rc->vbr_bits_off_target. Based on the @@ -2771,6 +2832,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0); + // Target vs actual bits for this arf group. + twopass->rolling_arf_group_target_bits += rc->this_frame_target; + twopass->rolling_arf_group_actual_bits += rc->projected_frame_size; + // Calculate the pct rc error. if (rc->total_actual_bits) { rc->rate_error_estimate = @@ -2792,12 +2857,27 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { // If the rate control is drifting consider adjustment to min or maxq. if ((cpi->oxcf.rc_mode != VPX_Q) && - (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) && !cpi->rc.is_src_frame_alt_ref) { const int maxq_adj_limit = rc->worst_quality - twopass->active_worst_quality; const int minq_adj_limit = (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT); + int aq_extend_min = 0; + int aq_extend_max = 0; + + // Extend min or Max Q range to account for imbalance from the base + // value when using AQ. + if (cpi->oxcf.aq_mode != NO_AQ) { + if (cm->seg.aq_av_offset < 0) { + // The balance of the AQ map tends towarda lowering the average Q. + aq_extend_min = 0; + aq_extend_max = VPXMIN(maxq_adj_limit, -cm->seg.aq_av_offset); + } else { + // The balance of the AQ map tends towards raising the average Q. + aq_extend_min = VPXMIN(minq_adj_limit, cm->seg.aq_av_offset); + aq_extend_max = 0; + } + } // Undershoot. if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) { @@ -2822,8 +2902,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { --twopass->extend_maxq; } - twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit); - twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit); + twopass->extend_minq = + clamp(twopass->extend_minq, aq_extend_min, minq_adj_limit); + twopass->extend_maxq = + clamp(twopass->extend_maxq, aq_extend_max, maxq_adj_limit); // If there is a big and undexpected undershoot then feed the extra // bits back in quickly. One situation where this may happen is if a diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h index 5875a7b9b..76072884d 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.h +++ b/libvpx/vp9/encoder/vp9_firstpass.h @@ -39,8 +39,6 @@ typedef struct { } FIRSTPASS_MB_STATS; #endif -#define VLOW_MOTION_THRESHOLD 950 - typedef struct { double frame; double weight; @@ -52,6 +50,7 @@ typedef struct { double pcnt_second_ref; double pcnt_neutral; double intra_skip_pct; + double intra_smooth_pct; // % of blocks that are smooth double inactive_zone_rows; // Image mask rows top and bottom. double inactive_zone_cols; // Image mask columns at left and right edges. double MVr; @@ -107,6 +106,7 @@ typedef struct { double modified_error_max; double modified_error_left; double mb_av_energy; + double mb_smooth_pct; #if CONFIG_FP_MB_STATS uint8_t *frame_mb_stats_buf; @@ -122,14 +122,13 @@ typedef struct { // Error score of frames still to be coded in kf group int64_t kf_group_error_left; - // The fraction for a kf groups total bits allocated to the inter frames - double kfgroup_inter_fraction; + double bpm_factor; + int rolling_arf_group_target_bits; + int rolling_arf_group_actual_bits; int sr_update_lag; - int kf_zeromotion_pct; int last_kfgroup_zeromotion_pct; - int gf_zeromotion_pct; int active_worst_quality; int baseline_active_worst_quality; int extend_minq; diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c index 8787be8ee..787bcf47e 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.c +++ b/libvpx/vp9/encoder/vp9_lookahead.c @@ -20,8 +20,8 @@ /* Return the buffer at the given absolute index and increment the index */ static struct lookahead_entry *pop(struct lookahead_ctx *ctx, - unsigned int *idx) { - unsigned int index = *idx; + int *idx) { + int index = *idx; struct lookahead_entry *buf = ctx->buf + index; assert(index < ctx->max_sz); @@ -35,7 +35,7 @@ static struct lookahead_entry *pop(struct lookahead_ctx *ctx, void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { if (ctx) { if (ctx->buf) { - unsigned int i; + int i; for (i = 0; i < ctx->max_sz; i++) vpx_free_frame_buffer(&ctx->buf[i].img); @@ -89,7 +89,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, #define USE_PARTIAL_COPY 0 -int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, +int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, @@ -207,7 +207,7 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { + if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } @@ -221,9 +221,9 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, if (index >= 0) { // Forward peek - if (index < (int)ctx->sz) { + if (index < ctx->sz) { index += ctx->read_idx; - if (index >= (int)ctx->max_sz) + if (index >= ctx->max_sz) index -= ctx->max_sz; buf = ctx->buf + index; } diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h index 13820380f..db0fd1cde 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.h +++ b/libvpx/vp9/encoder/vp9_lookahead.h @@ -36,10 +36,10 @@ struct lookahead_entry { #define MAX_PRE_FRAMES 1 struct lookahead_ctx { - unsigned int max_sz; /* Absolute size of the queue */ - unsigned int sz; /* Number of buffers currently in the queue */ - unsigned int read_idx; /* Read index */ - unsigned int write_idx; /* Write index */ + int max_sz; /* Absolute size of the queue */ + int sz; /* Number of buffers currently in the queue */ + int read_idx; /* Read index */ + int write_idx; /* Write index */ struct lookahead_entry *buf; /* Buffer list */ }; diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c index 41b6d1954..14a0b162b 100644 --- a/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/libvpx/vp9/encoder/vp9_mbgraph.c @@ -59,8 +59,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) { - int distortion; - unsigned int sse; + uint32_t distortion; + uint32_t sse; cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, @@ -69,8 +69,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, &distortion, &sse, NULL, 0, 0); } - xd->mi[0]->mbmi.mode = NEWMV; - xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv; + xd->mi[0]->mode = NEWMV; + xd->mi[0]->mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); @@ -147,7 +147,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; - xd->mi[0]->mbmi.mode = mode; + xd->mi[0]->mode = mode; vp9_predict_intra_block(xd, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, @@ -243,20 +243,23 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; MV gld_top_mv = {0, 0}; MODE_INFO mi_local; + MODE_INFO mi_above, mi_left; vp9_zero(mi_local); // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. x->mv_row_min = -BORDER_MV_PIXELS_B16; x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; - xd->up_available = 0; + // Signal to vp9_predict_intra_block() that above is not available + xd->above_mi = NULL; + xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; xd->mi[0] = &mi_local; - mi_local.mbmi.sb_type = BLOCK_16X16; - mi_local.mbmi.ref_frame[0] = LAST_FRAME; - mi_local.mbmi.ref_frame[1] = NONE; + mi_local.sb_type = BLOCK_16X16; + mi_local.ref_frame[0] = LAST_FRAME; + mi_local.ref_frame[1] = NONE; for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { MV gld_left_mv = gld_top_mv; @@ -268,7 +271,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, // the UMV borders. x->mv_col_min = -BORDER_MV_PIXELS_B16; x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; - xd->left_available = 0; + // Signal to vp9_predict_intra_block() that left is not available + xd->left_mi = NULL; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col]; @@ -280,14 +284,19 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, if (mb_col == 0) { gld_top_mv = gld_left_mv; } - xd->left_available = 1; + // Signal to vp9_predict_intra_block() that left is available + xd->left_mi = &mi_left; + mb_y_in_offset += 16; gld_y_in_offset += 16; arf_y_in_offset += 16; x->mv_col_min -= 16; x->mv_col_max -= 16; } - xd->up_available = 1; + + // Signal to vp9_predict_intra_block() that above is available + xd->above_mi = &mi_above; + mb_y_offset += buf->y_stride * 16; gld_y_offset += golden_ref->y_stride * 16; if (alt_ref) diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index be8f57f7d..2ebacc0b8 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> #include <limits.h> #include <math.h> #include <stdio.h> @@ -69,6 +70,8 @@ int vp9_init_search_range(int size) { static INLINE int mv_cost(const MV *mv, const int *joint_cost, int *const comp_cost[2]) { + assert(mv->row >= -MV_MAX && mv->row < MV_MAX); + assert(mv->col >= -MV_MAX && mv->col < MV_MAX); return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + comp_cost[1][mv->col]; } @@ -80,52 +83,52 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref, return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); } -static int mv_err_cost(const MV *mv, const MV *ref, - const int *mvjcost, int *mvcost[2], - int error_per_bit) { +#define PIXEL_TRANSFORM_ERROR_SCALE 4 +static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, + int *mvcost[2], int error_per_bit) { if (mvcost) { - const MV diff = { mv->row - ref->row, - mv->col - ref->col }; - return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * - error_per_bit, 13); + const MV diff = {mv->row - ref->row, mv->col - ref->col}; + // This product sits at a 32-bit ceiling right now and any additional + // accuracy in either bit cost or error cost will cause it to overflow. + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, + RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + + PIXEL_TRANSFORM_ERROR_SCALE); } return 0; } static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, - int error_per_bit) { + int sad_per_bit) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost, - x->nmvsadcost) * error_per_bit, 8); + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * + sad_per_bit, + VP9_PROB_COST_SHIFT); } void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { - int len, ss_count = 1; - - cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; - cfg->ss[0].offset = 0; + int len; + int ss_count = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 4 search sites per step. const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; int i; - for (i = 0; i < 4; ++i) { - search_site *const ss = &cfg->ss[ss_count++]; - ss->mv = ss_mvs[i]; - ss->offset = ss->mv.row * stride + ss->mv.col; + for (i = 0; i < 4; ++i, ++ss_count) { + cfg->ss_mv[ss_count] = ss_mvs[i]; + cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; } } - cfg->ss_count = ss_count; cfg->searches_per_step = 4; + cfg->total_steps = ss_count / cfg->searches_per_step; } void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { - int len, ss_count = 1; - - cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; - cfg->ss[0].offset = 0; + int len; + int ss_count = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 8 search sites per step. @@ -134,33 +137,23 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { {-len, -len}, {-len, len}, {len, -len}, {len, len} }; int i; - for (i = 0; i < 8; ++i) { - search_site *const ss = &cfg->ss[ss_count++]; - ss->mv = ss_mvs[i]; - ss->offset = ss->mv.row * stride + ss->mv.col; + for (i = 0; i < 8; ++i, ++ss_count) { + cfg->ss_mv[ss_count] = ss_mvs[i]; + cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; } } - cfg->ss_count = ss_count; cfg->searches_per_step = 8; + cfg->total_steps = ss_count / cfg->searches_per_step; } -/* - * To avoid the penalty for crossing cache-line read, preload the reference - * area in a small buffer, which is aligned to make sure there won't be crossing - * cache-line read while reading from this buffer. This reduced the cpu - * cycles spent on reading ref data in sub-pixel filter functions. - * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x - * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we - * could reduce the area. - */ - -/* estimated cost of a motion vector (r,c) */ -#define MVC(r, c) \ - (mvcost ? \ - ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ - mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ - error_per_bit + 4096) >> 13 : 0) +/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes + * from the same math as in mv_err_cost(). */ +#define MVC(r, c) \ + (mvcost ? \ + ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ + mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ + error_per_bit + 8192) >> 14 : 0) // convert motion vector component to offset for sv[a]f calc @@ -172,6 +165,33 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { return &buf[(r >> 3) * stride + (c >> 3)]; } +#if CONFIG_VP9_HIGHBITDEPTH +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + int64_t tmpmse; \ + if (second_pred == NULL) { \ + thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), \ + sp(r), z, src_stride, &sse); \ + } else { \ + thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), \ + sp(r), z, src_stride, &sse, second_pred); \ + } \ + tmpmse = thismse; \ + tmpmse += MVC(r, c); \ + if (tmpmse >= INT_MAX) { \ + v = INT_MAX; \ + } else if ((v = (uint32_t)tmpmse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } +#else /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ @@ -192,6 +212,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { v = INT_MAX; \ } +#endif #define FIRST_LEVEL_CHECKS \ { \ unsigned int left, right, up, down, diag; \ @@ -320,10 +341,10 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, const uint8_t *second_pred, int w, int h, int offset, int *mvjcost, int *mvcost[2], - unsigned int *sse1, - int *distortion) { - unsigned int besterr; + uint32_t *sse1, + uint32_t *distortion) { #if CONFIG_VP9_HIGHBITDEPTH + uint64_t besterr; if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); @@ -339,9 +360,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, } else { besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); } - *distortion = besterr; + *distortion = (uint32_t)besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + if (besterr >= UINT32_MAX) + return UINT32_MAX; + return (uint32_t)besterr; #else + uint32_t besterr; (void) xd; if (second_pred != NULL) { DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); @@ -352,8 +377,8 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, } *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); -#endif // CONFIG_VP9_HIGHBITDEPTH return besterr; +#endif // CONFIG_VP9_HIGHBITDEPTH } static INLINE int divide_and_round(const int n, const int d) { @@ -383,7 +408,7 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, (cost_list[4] - 2 * cost_list[0] + cost_list[2])); } -int vp9_find_best_sub_pixel_tree_pruned_evenmore( +uint32_t vp9_skip_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, @@ -393,8 +418,53 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore( int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, + uint32_t *distortion, + uint32_t *sse1, + const uint8_t *second_pred, + int w, int h) { + SETUP_SUBPEL_SEARCH; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, + z, src_stride, y, y_stride, second_pred, + w, h, offset, mvjcost, mvcost, + sse1, distortion); + (void) halfiters; + (void) quarteriters; + (void) eighthiters; + (void) whichdir; + (void) allow_hp; + (void) forced_stop; + (void) hstep; + (void) rr; + (void) rc; + (void) minr; + (void) minc; + (void) maxr; + (void) maxc; + (void) tr; + (void) tc; + (void) sse; + (void) thismse; + (void) cost_list; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return UINT32_MAX; + + return besterr; +} + +uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( + const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *cost_list, + int *mvjcost, int *mvcost[2], + uint32_t *distortion, + uint32_t *sse1, const uint8_t *second_pred, int w, int h) { SETUP_SUBPEL_SEARCH; @@ -445,7 +515,7 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore( tr = br; tc = bc; - if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { @@ -463,7 +533,7 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore( return besterr; } -int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, +uint32_t vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, @@ -472,8 +542,8 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, + uint32_t *distortion, + uint32_t *sse1, const uint8_t *second_pred, int w, int h) { SETUP_SUBPEL_SEARCH; @@ -513,7 +583,7 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, } } - if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { tr = br; tc = bc; hstep >>= 1; @@ -532,12 +602,12 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return INT_MAX; + return UINT32_MAX; return besterr; } -int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, +uint32_t vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, @@ -546,8 +616,8 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, + uint32_t *distortion, + uint32_t *sse1, const uint8_t *second_pred, int w, int h) { SETUP_SUBPEL_SEARCH; @@ -608,7 +678,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, tc = bc; } - if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { @@ -639,19 +709,19 @@ static const MV search_step_table[12] = { {0, -1}, {0, 1}, {-1, 0}, {1, 0} }; -int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, - MV *bestmv, const MV *ref_mv, - int allow_hp, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, - int iters_per_step, - int *cost_list, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, - const uint8_t *second_pred, - int w, int h) { +uint32_t vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *cost_list, + int *mvjcost, int *mvcost[2], + uint32_t *distortion, + uint32_t *sse1, + const uint8_t *second_pred, + int w, int h) { const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; @@ -680,7 +750,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, unsigned int cost_array[5]; int kr, kc; - if (!(allow_hp && vp9_use_mv_hp(ref_mv))) + if (!(allow_hp && use_mv_hp(ref_mv))) if (round == 3) round = 2; @@ -790,7 +860,6 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, } #undef MVC -#undef PRE #undef CHECK_BETTER static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, @@ -852,9 +921,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x, cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride, &sse) + - // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); - mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, - x->errorperbit); + mv_err_cost(&this_mv, &fcenter_mv, + x->nmvjointcost, x->mvcost, + x->errorperbit); } } else { for (i = 0; i < 4; i++) { @@ -866,9 +935,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x, cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride, &sse) + - // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); - mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, - x->errorperbit); + mv_err_cost(&this_mv, &fcenter_mv, + x->nmvjointcost, x->mvcost, + x->errorperbit); } } } @@ -1347,12 +1416,22 @@ int vp9_get_mvpred_var(const MACROBLOCK *x, const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV mv = {best_mv->row * 8, best_mv->col * 8}; - unsigned int unused; - + uint32_t unused; +#if CONFIG_VP9_HIGHBITDEPTH + uint64_t err= vfp->vf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), + in_what->stride, &unused); + err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, + x->mvcost, x->errorperbit) : 0); + if (err >= INT_MAX) + return INT_MAX; + return (int)err; +#else return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); +#endif } int vp9_get_mvpred_av_var(const MACROBLOCK *x, @@ -1523,69 +1602,83 @@ static int fast_dia_search(const MACROBLOCK *x, #undef CHECK_BETTER -int vp9_full_range_search_c(const MACROBLOCK *x, - const search_site_config *cfg, - MV *ref_mv, MV *best_mv, - int search_param, int sad_per_bit, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { +// Exhuastive motion search around a given centre position with a given +// step size. +static int exhuastive_mesh_search(const MACROBLOCK *x, + MV *ref_mv, MV *best_mv, + int range, int step, int sad_per_bit, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int range = 64; - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + MV fcenter_mv = {center_mv->row, center_mv->col}; unsigned int best_sad = INT_MAX; int r, c, i; int start_col, end_col, start_row, end_row; + int col_step = (step > 1) ? step : 4; - // The cfg and search_param parameters are not used in this search variant - (void)cfg; - (void)search_param; + assert(step >= 1); - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - *best_mv = *ref_mv; - *num00 = 11; + clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + *best_mv = fcenter_mv; best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row); - start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col); - end_row = VPXMIN(range, x->mv_row_max - ref_mv->row); - end_col = VPXMIN(range, x->mv_col_max - ref_mv->col); - - for (r = start_row; r <= end_row; ++r) { - for (c = start_col; c <= end_col; c += 4) { - if (c + 3 <= end_col) { - unsigned int sads[4]; - const uint8_t *addrs[4]; - for (i = 0; i < 4; ++i) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - addrs[i] = get_buf_from_mv(in_what, &mv); - } - - fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); - - for (i = 0; i < 4; ++i) { - if (sads[i] < best_sad) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - const unsigned int sad = sads[i] + - mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } + get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + + mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); + start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row); + start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col); + end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row); + end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col); + + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += col_step) { + // Step > 1 means we are not checking every location in this pass. + if (step > 1) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c}; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } } else { - for (i = 0; i < end_col - c; ++i) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + // 4 sads in a single call if we are checking every location + if (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + addrs[i] = get_buf_from_mv(in_what, &mv); + } + fn_ptr->sdx4df(what->buf, what->stride, addrs, + in_what->stride, sads); + + for (i = 0; i < 4; ++i) { + if (sads[i] < best_sad) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + const unsigned int sad = sads[i] + + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } else { + for (i = 0; i < end_col - c; ++i) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; + sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } } } @@ -1612,8 +1705,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const uint8_t *best_address; unsigned int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; + int best_site = -1; + int last_site = -1; int ref_row; int ref_col; @@ -1623,8 +1716,10 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... - const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; - const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; +// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; + const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; + const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; + const int tot_steps = cfg->total_steps - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); @@ -1642,17 +1737,17 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - i = 1; + i = 0; for (step = 0; step < tot_steps; step++) { int all_in = 1, t; // All_in is true if every one of the points we are checking are within // the bounds of the image. - all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); - all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); - all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); - all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); + all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min); + all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max); + all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min); + all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max); // If all the pixels are within the bounds we don't check whether the // search point is valid in this loop, otherwise we check each point @@ -1664,15 +1759,15 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) - block_offset[t] = ss[i + t].offset + best_address; + block_offset[t] = ss_os[i + t] + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { - const MV this_mv = {best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col}; + const MV this_mv = {best_mv->row + ss_mv[i].row, + best_mv->col + ss_mv[i].col}; sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad_array[t] < bestsad) { @@ -1685,11 +1780,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, } else { for (j = 0; j < cfg->searches_per_step; j++) { // Trap illegal vectors - const MV this_mv = {best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col}; + const MV this_mv = {best_mv->row + ss_mv[i].row, + best_mv->col + ss_mv[i].col}; if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[i].offset + best_address; + const uint8_t *const check_here = ss_os[i] + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); @@ -1705,25 +1800,25 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, } } if (best_site != last_site) { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; + best_mv->row += ss_mv[best_site].row; + best_mv->col += ss_mv[best_site].col; + best_address += ss_os[best_site]; last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { - const MV this_mv = {best_mv->row + ss[best_site].mv.row, - best_mv->col + ss[best_site].mv.col}; + const MV this_mv = {best_mv->row + ss_mv[best_site].row, + best_mv->col + ss_mv[best_site].col}; if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[best_site].offset + best_address; + const uint8_t *const check_here = ss_os[best_site] + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; + best_mv->row += ss_mv[best_site].row; + best_mv->col += ss_mv[best_site].col; + best_address += ss_os[best_site]; continue; } } @@ -1745,7 +1840,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { int center, offset = 0; int bw = 4 << bwl; // redundant variable, to be changed in the experiments. for (d = 0; d <= bw; d += 16) { - this_sad = vp9_vector_var(&ref[d], src, bwl); + this_sad = vpx_vector_var(&ref[d], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; offset = d; @@ -1758,7 +1853,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { // check limit if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -1771,7 +1866,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { // check limit if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -1784,7 +1879,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { // check limit if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -1797,7 +1892,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { // check limit if (this_pos < 0 || this_pos > bw) continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; @@ -1815,7 +1910,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; DECLARE_ALIGNED(16, int16_t, hbuf[128]); DECLARE_ALIGNED(16, int16_t, vbuf[128]); @@ -1829,12 +1924,12 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; - MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv; + MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; unsigned int best_sad, tmp_sad, this_sad[4]; MV this_mv; const int norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame = - vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); + vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); if (scaled_ref_frame) { int i; @@ -1866,25 +1961,25 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, // Set up prediction 1-D reference set ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); for (idx = 0; idx < search_width; idx += 16) { - vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); + vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); ref_buf += 16; } ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; for (idx = 0; idx < search_height; ++idx) { - vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor; + vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; ref_buf += ref_stride; } // Set up src 1-D reference set for (idx = 0; idx < bw; idx += 16) { src_buf = x->plane[0].src.buf + idx; - vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); + vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); } src_buf = x->plane[0].src.buf; for (idx = 0; idx < bh; ++idx) { - src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor; + src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; src_buf += src_stride; } @@ -2015,6 +2110,70 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, return bestsme; } +#define MIN_RANGE 7 +#define MAX_RANGE 256 +#define MIN_INTERVAL 1 +// Runs an limited range exhaustive mesh search using a pattern set +// according to the encode speed profile. +static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, + MV *centre_mv_full, int sadpb, int *cost_list, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv) { + const SPEED_FEATURES *const sf = &cpi->sf; + MV temp_mv = {centre_mv_full->row, centre_mv_full->col}; + MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3}; + int bestsme; + int i; + int interval = sf->mesh_patterns[0].interval; + int range = sf->mesh_patterns[0].range; + int baseline_interval_divisor; + + // Keep track of number of exhaustive calls (this frame in this thread). + ++(*x->ex_search_count_ptr); + + // Trap illegal values for interval and range for this function. + if ((range < MIN_RANGE) || (range > MAX_RANGE) || + (interval < MIN_INTERVAL) || (interval > range)) + return INT_MAX; + + baseline_interval_divisor = range / interval; + + // Check size of proposed first range against magnitude of the centre + // value used as a starting point. + range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); + range = VPXMIN(range, MAX_RANGE); + interval = VPXMAX(interval, range / baseline_interval_divisor); + + // initial search + bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, + interval, sadpb, fn_ptr, &temp_mv); + + if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { + // Progressive searches with range and step size decreasing each time + // till we reach a step size of 1. Then break out. + for (i = 1; i < MAX_MESH_STEP; ++i) { + // First pass with coarser step and longer range + bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, + sf->mesh_patterns[i].range, + sf->mesh_patterns[i].interval, + sadpb, fn_ptr, &temp_mv); + + if (sf->mesh_patterns[i].interval == 1) + break; + } + } + + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); + *dst_mv = temp_mv; + + // Return cost list. + if (cost_list) { + calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); + } + return bestsme; +} + int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, @@ -2328,6 +2487,18 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, return best_sad; } +#define MIN_EX_SEARCH_LIMIT 128 +static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) { + const SPEED_FEATURES *const sf = &cpi->sf; + const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT, + (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100); + + return sf->allow_exhaustive_searches && + (sf->exhaustive_searches_thresh < INT_MAX) && + (*x->ex_search_count_ptr <= max_ex) && + !cpi->rc.is_src_frame_alt_ref; +} + int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, @@ -2346,6 +2517,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, cost_list[4] = INT_MAX; } + // Keep track of number of searches (this frame in this thread). + ++(*x->m_search_count_ptr); + switch (method) { case FAST_DIAMOND: var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, @@ -2371,6 +2545,27 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); + + // Should we allow a follow on exhaustive search? + if (is_exhaustive_allowed(cpi, x)) { + int64_t exhuastive_thr = sf->exhaustive_searches_thresh; + exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + // Threshold variance for an exhaustive full search. + if (var > exhuastive_thr) { + int var_ex; + MV tmp_mv_ex; + var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, + error_per_bit, cost_list, fn_ptr, + ref_mv, &tmp_mv_ex); + + if (var_ex < var) { + var = var_ex; + *tmp_mv = tmp_mv_ex; + } + } + } break; default: assert(0 && "Invalid search method."); diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index 5efd5435b..86cd267f3 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -31,16 +31,12 @@ extern "C" { // for Block_16x16 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) -// motion search site -typedef struct search_site { - MV mv; - int offset; -} search_site; - typedef struct search_site_config { - search_site ss[8 * MAX_MVSEARCH_STEPS + 1]; - int ss_count; + // motion search sites + MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector + intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset int searches_per_step; + int total_steps; } search_site_config; void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); @@ -78,7 +74,7 @@ unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col); -typedef int (fractional_mv_step_fp) ( +typedef uint32_t (fractional_mv_step_fp) ( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, @@ -88,7 +84,7 @@ typedef int (fractional_mv_step_fp) ( int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h); @@ -96,6 +92,7 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore; +extern fractional_mv_step_fp vp9_skip_sub_pixel_tree; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.c b/libvpx/vp9/encoder/vp9_noise_estimate.c new file mode 100644 index 000000000..4b43b3879 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_noise_estimate.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <limits.h> +#include <math.h> + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_scale/yv12config.h" +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/encoder/vp9_context_tree.h" +#include "vp9/encoder/vp9_noise_estimate.h" +#include "vp9/encoder/vp9_encoder.h" + +void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, + int width, + int height) { + ne->enabled = 0; + ne->level = kLowLow; + ne->value = 0; + ne->count = 0; + ne->thresh = 90; + ne->last_w = 0; + ne->last_h = 0; + if (width * height >= 1920 * 1080) { + ne->thresh = 200; + } else if (width * height >= 1280 * 720) { + ne->thresh = 130; + } + ne->num_frames_estimate = 20; +} + +static int enable_noise_estimation(VP9_COMP *const cpi) { + // Enable noise estimation if denoising is on. +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + return 1; +#endif + // Only allow noise estimate under certain encoding mode. + // Enabled for 1 pass CBR, speed >=5, and if resolution is same as original. + // Not enabled for SVC mode and screen_content_mode. + // Not enabled for low resolutions. + if (cpi->oxcf.pass == 0 && + cpi->oxcf.rc_mode == VPX_CBR && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->oxcf.speed >= 5 && + cpi->resize_state == ORIG && + cpi->resize_pending == 0 && + !cpi->use_svc && + cpi->oxcf.content != VP9E_CONTENT_SCREEN && + cpi->common.width >= 640 && + cpi->common.height >= 480) + return 1; + else + return 0; +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +static void copy_frame(YV12_BUFFER_CONFIG * const dest, + const YV12_BUFFER_CONFIG * const src) { + int r; + const uint8_t *srcbuf = src->y_buffer; + uint8_t *destbuf = dest->y_buffer; + + assert(dest->y_width == src->y_width); + assert(dest->y_height == src->y_height); + + for (r = 0; r < dest->y_height; ++r) { + memcpy(destbuf, srcbuf, dest->y_width); + destbuf += dest->y_stride; + srcbuf += src->y_stride; + } +} +#endif // CONFIG_VP9_TEMPORAL_DENOISING + +NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { + int noise_level = kLowLow; + if (ne->value > (ne->thresh << 1)) { + noise_level = kHigh; + } else { + if (ne->value > ne->thresh) + noise_level = kMedium; + else if (ne->value > ((9 * ne->thresh) >> 4)) + noise_level = kLow; + else + noise_level = kLowLow; + } + return noise_level; +} + +void vp9_update_noise_estimate(VP9_COMP *const cpi) { + const VP9_COMMON *const cm = &cpi->common; + NOISE_ESTIMATE *const ne = &cpi->noise_estimate; + // Estimate of noise level every frame_period frames. + int frame_period = 8; + int thresh_consec_zeromv = 6; + unsigned int thresh_sum_diff = 100; + unsigned int thresh_sum_spatial = (200 * 200) << 8; + unsigned int thresh_spatial_var = (32 * 32) << 8; + int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; + // Estimate is between current source and last source. + YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + last_source = &cpi->denoiser.last_source; +#endif + ne->enabled = enable_noise_estimation(cpi); + if (!ne->enabled || + cm->current_video_frame % frame_period != 0 || + last_source == NULL || + ne->last_w != cm->width || + ne->last_h != cm->height) { +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + copy_frame(&cpi->denoiser.last_source, cpi->Source); +#endif + if (last_source != NULL) { + ne->last_w = cm->width; + ne->last_h = cm->height; + } + return; + } else if (cpi->rc.avg_frame_low_motion < 50) { + // Force noise estimation to 0 and denoiser off if content has high motion. + ne->level = kLowLow; +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); +#endif + return; + } else { + int num_samples = 0; + uint64_t avg_est = 0; + int bsize = BLOCK_16X16; + static const unsigned char const_source[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have + // been encoded as zero/small mv at least x consecutive frames, compute + // the variance to update estimate of noise in the source. + const uint8_t *src_y = cpi->Source->y_buffer; + const int src_ystride = cpi->Source->y_stride; + const uint8_t *last_src_y = last_source->y_buffer; + const int last_src_ystride = last_source->y_stride; + const uint8_t *src_u = cpi->Source->u_buffer; + const uint8_t *src_v = cpi->Source->v_buffer; + const int src_uvstride = cpi->Source->uv_stride; + int mi_row, mi_col; + int num_low_motion = 0; + int frame_low_motion = 1; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { + int bl_index = mi_row * cm->mi_cols + mi_col; + if (cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv) + num_low_motion++; + } + } + if (num_low_motion < ((3 * cm->mi_rows * cm->mi_cols) >> 3)) + frame_low_motion = 0; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { + // 16x16 blocks, 1/4 sample of frame. + if (mi_row % 4 == 0 && mi_col % 4 == 0 && + mi_row < cm->mi_rows - 1 && + mi_col < cm->mi_cols - 1) { + int bl_index = mi_row * cm->mi_cols + mi_col; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + // Only consider blocks that are likely steady background. i.e, have + // been encoded as zero/low motion x (= thresh_consec_zeromv) frames + // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all + // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. + int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); + int is_skin = 0; + if (cpi->use_skin_detection) { + is_skin = vp9_compute_skin_block(src_y, + src_u, + src_v, + src_ystride, + src_uvstride, + bsize, + consec_zeromv, + 0); + } + if (frame_low_motion && + cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index1] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index2] > thresh_consec_zeromv && + cpi->consec_zero_mv[bl_index3] > thresh_consec_zeromv && + !is_skin) { + // Compute variance. + unsigned int sse; + unsigned int variance = cpi->fn_ptr[bsize].vf(src_y, + src_ystride, + last_src_y, + last_src_ystride, + &sse); + // Only consider this block as valid for noise measurement if the + // average term (sse - variance = N * avg^{2}, N = 16X16) of the + // temporal residual is small (avoid effects from lighting change). + if ((sse - variance) < thresh_sum_diff) { + unsigned int sse2; + const unsigned int spatial_variance = + cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source, + 0, &sse2); + // Avoid blocks with high brightness and high spatial variance. + if ((sse2 - spatial_variance) < thresh_sum_spatial && + spatial_variance < thresh_spatial_var) { + avg_est += variance / ((spatial_variance >> 9) + 1); + num_samples++; + } + } + } + } + src_y += 8; + last_src_y += 8; + src_u += 4; + src_v += 4; + } + src_y += (src_ystride << 3) - (cm->mi_cols << 3); + last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3); + src_u += (src_uvstride << 2) - (cm->mi_cols << 2); + src_v += (src_uvstride << 2) - (cm->mi_cols << 2); + } + ne->last_w = cm->width; + ne->last_h = cm->height; + // Update noise estimate if we have at a minimum number of block samples, + // and avg_est > 0 (avg_est == 0 can happen if the application inputs + // duplicate frames). + if (num_samples > min_blocks_estimate && avg_est > 0) { + // Normalize. + avg_est = avg_est / num_samples; + // Update noise estimate. + ne->value = (int)((15 * ne->value + avg_est) >> 4); + ne->count++; + if (ne->count == ne->num_frames_estimate) { + // Reset counter and check noise level condition. + ne->num_frames_estimate = 30; + ne->count = 0; + ne->level = vp9_noise_estimate_extract_level(ne); +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); +#endif + } + } + } +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0) + copy_frame(&cpi->denoiser.last_source, cpi->Source); +#endif +} diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.h b/libvpx/vp9/encoder/vp9_noise_estimate.h new file mode 100644 index 000000000..826d125b5 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_noise_estimate.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_NOISE_ESTIMATE_H_ +#define VP9_ENCODER_NOISE_ESTIMATE_H_ + +#include "vp9/encoder/vp9_block.h" +#include "vp9/encoder/vp9_skin_detection.h" +#include "vpx_scale/yv12config.h" + +#if CONFIG_VP9_TEMPORAL_DENOISING +#include "vp9/encoder/vp9_denoiser.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum noise_level { + kLowLow, + kLow, + kMedium, + kHigh +} NOISE_LEVEL; + +typedef struct noise_estimate { + int enabled; + NOISE_LEVEL level; + int value; + int thresh; + int count; + int last_w; + int last_h; + int num_frames_estimate; +} NOISE_ESTIMATE; + +struct VP9_COMP; + +void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, + int width, + int height); + +NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne); + +void vp9_update_noise_estimate(struct VP9_COMP *const cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_NOISE_ESTIMATE_H_ diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c index 5444bc89f..f6b1dfcd5 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.c +++ b/libvpx/vp9/encoder/vp9_picklpf.c @@ -78,7 +78,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Start the search at the previous frame filter level unless it is now out of // range. - int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); + int filt_mid = + clamp(lf->last_filt_level, min_filter_level, max_filter_level); int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; // Sum squared error at each filter level int64_t ss_err[MAX_LOOP_FILTER + 1]; diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c index fc4d9ae67..ba6a0c6e1 100644 --- a/libvpx/vp9/encoder/vp9_pickmode.c +++ b/libvpx/vp9/encoder/vp9_pickmode.c @@ -40,16 +40,25 @@ typedef struct { int in_use; } PRED_BUFFER; -static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, + +static const int pos_shift_16x16[4][4] = { + {9, 10, 13, 14}, + {11, 12, 15, 16}, + {17, 18, 21, 22}, + {19, 20, 23, 24} +}; + +static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, + const MACROBLOCK *x, const MACROBLOCKD *xd, const TileInfo *const tile, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col) { + int_mv *mv_ref_list, int_mv *base_mv, + int mi_row, int mi_col, int use_base_mv) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; + const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; int different_ref_found = 0; int context_counter = 0; @@ -66,12 +75,11 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; + context_counter += mode_2_counter[candidate_mi->mode]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) + if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1), refmv_count, mv_ref_list, Done); } @@ -85,12 +93,12 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done); + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); } } @@ -101,15 +109,29 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row + * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, + IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } } + if (use_base_mv && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + ref_frame == LAST_FRAME) { + // Get base layer mv. + MV_REF *candidate = + &cm->prev_frame->mvs[(mi_col>>1) + (mi_row>>1) * (cm->mi_cols>>1)]; + if (candidate->mv[0].as_int != INVALID_MV) { + base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2); + base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2); + clamp_mv_ref(&base_mv->as_mv, xd); + } else { + base_mv->as_int = INVALID_MV; + } + } Done: @@ -125,16 +147,17 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, - int64_t best_rd_sofar) { + int64_t best_rd_sofar, int use_base_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; const int step_param = cpi->sf.mv.fullpel_search_step_param; const int sadpb = x->sadperbit16; MV mvp_full; - const int ref = mbmi->ref_frame[0]; + const int ref = mi->ref_frame[0]; const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; - int dis; + MV center_mv; + uint32_t dis; int rate_mode; const int tmp_col_min = x->mv_col_min; const int tmp_col_max = x->mv_col_max; @@ -164,9 +187,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; + if (!use_base_mv) + center_mv = ref_mv; + else + center_mv = tmp_mv->as_mv; + vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, cond_cost_list(cpi, cost_list), - &ref_mv, &tmp_mv->as_mv, INT_MAX, 0); + ¢er_mv, &tmp_mv->as_mv, INT_MAX, 0); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -224,7 +252,7 @@ static void block_variance(const uint8_t *src, int src_stride, &sse8x8[k], &sum8x8[k]); *sse += sse8x8[k]; *sum += sum8x8[k]; - var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6); + var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6); k++; } } @@ -245,7 +273,7 @@ static void calculate_variance(int bw, int bh, TX_SIZE tx_size, sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1]; sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] + sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1]; - var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >> + var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >> (b_width_log2_lookup[unit_size] + b_height_log2_lookup[unit_size] + 6)); k++; @@ -300,7 +328,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, tx_size = TX_8X8; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && - cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id)) + cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) tx_size = TX_8X8; else if (tx_size > TX_16X16) tx_size = TX_16X16; @@ -310,7 +338,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, } assert(tx_size >= TX_8X8); - xd->mi[0]->mbmi.tx_size = tx_size; + xd->mi[0]->tx_size = tx_size; // Evaluate if the partition block is a skippable block in Y plane. { @@ -379,7 +407,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, for (i = 1; i <= 2; i++) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; - const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd); + const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); const int uv_bw = b_width_log2_lookup[uv_bsize]; @@ -475,19 +503,19 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) - xd->mi[0]->mbmi.tx_size = + xd->mi[0]->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else - xd->mi[0]->mbmi.tx_size = TX_8X8; + xd->mi[0]->tx_size = TX_8X8; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && - cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id)) - xd->mi[0]->mbmi.tx_size = TX_8X8; - else if (xd->mi[0]->mbmi.tx_size > TX_16X16) - xd->mi[0]->mbmi.tx_size = TX_16X16; + cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) + xd->mi[0]->tx_size = TX_8X8; + else if (xd->mi[0]->tx_size > TX_16X16) + xd->mi[0]->tx_size = TX_16X16; } else { - xd->mi[0]->mbmi.tx_size = + xd->mi[0]->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } @@ -495,7 +523,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, // Evaluate if the partition block is a skippable block in Y plane. { const BLOCK_SIZE unit_size = - txsize_to_bsize[xd->mi[0]->mbmi.tx_size]; + txsize_to_bsize[xd->mi[0]->tx_size]; const unsigned int num_blk_log2 = (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) + (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]); @@ -562,39 +590,46 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, } #if CONFIG_VP9_HIGHBITDEPTH -static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, - int *skippable, int64_t *sse, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size) { +static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, + int *skippable, int64_t *sse, BLOCK_SIZE bsize, + TX_SIZE tx_size) { MACROBLOCKD *xd = &x->e_mbd; unsigned int var_y, sse_y; - (void)plane; + (void)tx_size; - model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y); + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y, + &sse_y); *sse = INT_MAX; *skippable = 0; return; } #else -static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, - int *skippable, int64_t *sse, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size) { +static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, + int *skippable, int64_t *sse, BLOCK_SIZE bsize, + TX_SIZE tx_size) { MACROBLOCKD *xd = &x->e_mbd; - const struct macroblockd_plane *pd = &xd->plane[plane]; - const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *pd = &xd->plane[0]; + struct macroblock_plane *const p = &x->plane[0]; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; const int step = 1 << (tx_size << 1); const int block_step = (1 << tx_size); int block = 0, r, c; - int shift = tx_size == TX_32X32 ? 0 : 2; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : - xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + xd->mb_to_right_edge >> 5); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : - xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + xd->mb_to_bottom_edge >> 5); int eob_cost = 0; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; (void)cpi; - vp9_subtract_plane(x, bsize, plane); + + // The max tx_size passed in is TX_16X16. + assert(tx_size != TX_32X32); + + vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); *skippable = 1; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. @@ -606,27 +641,20 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; - const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int diff_stride = bw; const int16_t *src_diff; src_diff = &p->src_diff[(r * diff_stride + c) << 2]; switch (tx_size) { - case TX_32X32: - vpx_fdct32x32_rd(src_diff, coeff, diff_stride); - vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; case TX_16X16: - vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff); + vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff); vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: - vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff); + vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff); vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -650,18 +678,17 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, } } - if (*skippable && *sse < INT64_MAX) { - *rate = 0; - *dist = (*sse << 6) >> shift; - *sse = *dist; - return; + this_rdc->rate = 0; + if (*sse < INT64_MAX) { + *sse = (*sse << 6) >> 2; + if (*skippable) { + this_rdc->dist = *sse; + return; + } } block = 0; - *rate = 0; - *dist = 0; - if (*sse < INT64_MAX) - *sse = (*sse << 6) >> shift; + this_rdc->dist = 0; for (r = 0; r < max_blocks_high; r += block_step) { for (c = 0; c < num_4x4_w; c += block_step) { if (c < max_blocks_wide) { @@ -671,26 +698,26 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, uint16_t *const eob = &p->eobs[block]; if (*eob == 1) - *rate += (int)abs(qcoeff[0]); + this_rdc->rate += (int)abs(qcoeff[0]); else if (*eob > 1) - *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4); + this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4); - *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift; + this_rdc->dist += + vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2; } block += step; } } - if (*skippable == 0) { - *rate <<= 10; - *rate += (eob_cost << 8); - } + // If skippable is set, rate gets clobbered later. + this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT); + this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT); } #endif static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int *out_rate_sum, int64_t *out_dist_sum, + RD_COST *this_rdc, unsigned int *var_y, unsigned int *sse_y, int start_plane, int stop_plane) { // Note our transform coeffs are 8 times an orthogonal transform. @@ -701,8 +728,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, int64_t dist; int i; - *out_rate_sum = 0; - *out_dist_sum = 0; + this_rdc->rate = 0; + this_rdc->dist = 0; for (i = start_plane; i <= stop_plane; ++i) { struct macroblock_plane *const p = &x->plane[i]; @@ -733,8 +760,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, dc_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH - *out_rate_sum += rate >> 1; - *out_dist_sum += dist << 3; + this_rdc->rate += rate >> 1; + this_rdc->dist += dist << 3; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -749,8 +776,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, ac_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH - *out_rate_sum += rate; - *out_dist_sum += dist << 4; + this_rdc->rate += rate; + this_rdc->dist += dist << 4; } } @@ -779,14 +806,20 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d yv12_mb[][MAX_MB_PLANE], int *rate, int64_t *dist) { MACROBLOCKD *xd = &x->e_mbd; - + MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var = var_y, sse = sse_y; // Skipping threshold for ac. unsigned int thresh_ac; // Skipping threshold for dc. unsigned int thresh_dc; - if (x->encode_breakout > 0) { + int motion_low = 1; + if (mi->mv[0].as_mv.row > 64 || + mi->mv[0].as_mv.row < -64 || + mi->mv[0].as_mv.col > 64 || + mi->mv[0].as_mv.col < -64) + motion_low = 0; + if (x->encode_breakout > 0 && motion_low == 1) { // Set a maximum for threshold to avoid big PSNR loss in low bit rate // case. Use extreme low threshold for static frames to limit // skipping. @@ -826,6 +859,12 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, if (var <= thresh_ac && (sse - var) <= thresh_dc) { unsigned int sse_u, sse_v; unsigned int var_u, var_v; + unsigned int thresh_ac_uv = thresh_ac; + unsigned int thresh_dc_uv = thresh_dc; + if (x->sb_is_skin) { + thresh_ac_uv = 0; + thresh_dc_uv = 0; + } // Skip UV prediction unless breakout is zero (lossless) to save // computation with low impact on the result @@ -841,14 +880,14 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[1].dst.stride, &sse_u); // U skipping condition checking - if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) { + if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) { var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, x->plane[2].src.stride, xd->plane[2].dst.buf, xd->plane[2].dst.stride, &sse_v); // V skipping condition checking - if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) { + if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) { x->skip = 1; // The cost of skip bit needs to be added. @@ -874,8 +913,8 @@ struct estimate_block_intra_args { VP9_COMP *cpi; MACROBLOCK *x; PREDICTION_MODE mode; - int rate; - int64_t dist; + int skippable; + RD_COST *rdc; }; static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, @@ -892,8 +931,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; int i, j; - int rate; - int64_t dist; + RD_COST this_rdc; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); @@ -909,23 +947,20 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (plane == 0) { int64_t this_sse = INT64_MAX; - int is_skippable; // TODO(jingning): This needs further refactoring. - block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0, - bsize_tx, VPXMIN(tx_size, TX_16X16)); - x->skip_txfm[0] = is_skippable; - // TODO(jingning): Skip is signalled per prediciton block not per tx block. - rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable); + block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx, + VPXMIN(tx_size, TX_16X16)); } else { - unsigned int var, sse; - model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse, - plane, plane); + unsigned int var = 0; + unsigned int sse = 0; + model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane, + plane); } p->src.buf = src_buf_base; pd->dst.buf = dst_buf_base; - args->rate += rate; - args->dist += dist; + args->rdc->rate += this_rdc.rate; + args->rdc->dist += this_rdc.dist; } static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = { @@ -975,17 +1010,17 @@ static INLINE void update_thresh_freq_fact(VP9_COMP *cpi, void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; RD_COST this_rdc, best_rdc; PREDICTION_MODE this_mode; - struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; + struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; const TX_SIZE intra_tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); MODE_INFO *const mic = xd->mi[0]; int *bmode_costs; - const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; @@ -994,29 +1029,35 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, vp9_rd_cost_reset(&best_rdc); vp9_rd_cost_reset(&this_rdc); - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->mv[0].as_int = INVALID_MV; - mbmi->uv_mode = DC_PRED; + mi->ref_frame[0] = INTRA_FRAME; + mi->mv[0].as_int = INVALID_MV; + mi->uv_mode = DC_PRED; memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); // Change the limit of this loop to add other intra prediction // mode tests. for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) { + this_rdc.dist = this_rdc.rate = 0; args.mode = this_mode; - args.rate = 0; - args.dist = 0; - mbmi->tx_size = intra_tx_size; + args.skippable = 1; + args.rdc = &this_rdc; + mi->tx_size = intra_tx_size; vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, &args); - this_rdc.rate = args.rate; - this_rdc.dist = args.dist; + if (args.skippable) { + x->skip_txfm[0] = SKIP_TXFM_AC_DC; + this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1); + } else { + x->skip_txfm[0] = SKIP_TXFM_NONE; + this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0); + } this_rdc.rate += bmode_costs[this_mode]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; - mbmi->mode = this_mode; + mi->mode = this_mode; } } @@ -1068,17 +1109,228 @@ static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = { {GOLDEN_FRAME, NEWMV} }; -// TODO(jingning) placeholder for inter-frame non-RD mode decision. -// this needs various further optimizations. to be continued.. +static int set_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + // Reduce the intra cost penalty for small blocks (<=16x16). + int reduction_fac = + (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0; + if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh) + // Don't reduce intra cost penalty if estimated noise level is high. + reduction_fac = 0; + return vp9_get_intra_cost_penalty( + cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac; +} + +static INLINE void find_predictors(VP9_COMP *cpi, MACROBLOCK *x, + MV_REFERENCE_FRAME ref_frame, + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int const_motion[MAX_REF_FRAMES], + int *ref_frame_skip_mask, + const int flag_list[4], + TileDataEnc *tile_data, + int mi_row, int mi_col, + struct buf_2d yv12_mb[4][MAX_MB_PLANE], + BLOCK_SIZE bsize, + int force_skip_low_temp_var) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + TileInfo *const tile_info = &tile_data->tile_info; + // TODO(jingning) placeholder for inter-frame non-RD mode decision. + x->pred_mv_sad[ref_frame] = INT_MAX; + frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; + frame_mv[ZEROMV][ref_frame].as_int = 0; + // this needs various further optimizations. to be continued.. + if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { + int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; + const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; + vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, + sf, sf); + if (cm->use_prev_frame_mvs) { + vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, + candidates, mi_row, mi_col, + x->mbmi_ext->mode_context); + } else { + const_motion[ref_frame] = + mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame, + candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col, + (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id)); + } + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, + &frame_mv[NEARESTMV][ref_frame], + &frame_mv[NEARMV][ref_frame]); + // Early exit for golden frame if force_skip_low_temp_var is set. + if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 && + !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) { + vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, bsize); + } + } else { + *ref_frame_skip_mask |= (1 << ref_frame); + } +} + +static void vp9_large_block_mv_bias(const NOISE_ESTIMATE *ne, RD_COST *this_rdc, + BLOCK_SIZE bsize, int mv_row, int mv_col, + int is_last_frame) { + // Bias against non-zero (above some threshold) motion for large blocks. + // This is temporary fix to avoid selection of large mv for big blocks. + if (mv_row > 64 || mv_row < -64 || mv_col > 64 || mv_col < -64) { + if (bsize == BLOCK_64X64) + this_rdc->rdcost = this_rdc->rdcost << 1; + else if (bsize >= BLOCK_32X32) + this_rdc->rdcost = 3 * this_rdc->rdcost >> 1; + } + // If noise estimation is enabled, and estimated level is above threshold, + // add a bias to LAST reference with small motion, for large blocks. + if (ne->enabled && ne->level >= kMedium && + bsize >= BLOCK_32X32 && is_last_frame && + mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) { + this_rdc->rdcost = 7 * this_rdc->rdcost >> 3; + } +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +static void vp9_pickmode_ctx_den_update( + VP9_PICKMODE_CTX_DEN *ctx_den, + int64_t zero_last_cost_orig, + int ref_frame_cost[MAX_REF_FRAMES], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int reuse_inter_pred, + TX_SIZE best_tx_size, + PREDICTION_MODE best_mode, + MV_REFERENCE_FRAME best_ref_frame, + INTERP_FILTER best_pred_filter, + uint8_t best_mode_skip_txfm) { + ctx_den->zero_last_cost_orig = zero_last_cost_orig; + ctx_den->ref_frame_cost = ref_frame_cost; + ctx_den->frame_mv = frame_mv; + ctx_den->reuse_inter_pred = reuse_inter_pred; + ctx_den->best_tx_size = best_tx_size; + ctx_den->best_mode = best_mode; + ctx_den->best_ref_frame = best_ref_frame; + ctx_den->best_pred_filter = best_pred_filter; + ctx_den->best_mode_skip_txfm = best_mode_skip_txfm; +} + +static void recheck_zeromv_after_denoising( + VP9_COMP *cpi, MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd, + VP9_DENOISER_DECISION decision, VP9_PICKMODE_CTX_DEN *ctx_den, + struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_COST *best_rdc, BLOCK_SIZE bsize, + int mi_row, int mi_col) { + // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on + // denoised result. Only do this under noise conditions, and if rdcost of + // ZEROMV onoriginal source is not significantly higher than rdcost of best + // mode. + if (cpi->noise_estimate.enabled && + cpi->noise_estimate.level > kLow && + ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) && + ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) || + (ctx_den->best_ref_frame == GOLDEN_FRAME && + decision == FILTER_ZEROMV_BLOCK))) { + // Check if we should pick ZEROMV on denoised signal. + int rate = 0; + int64_t dist = 0; + uint32_t var_y = UINT_MAX; + uint32_t sse_y = UINT_MAX; + RD_COST this_rdc; + mi->mode = ZEROMV; + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE; + mi->mv[0].as_int = 0; + mi->interp_filter = EIGHTTAP; + xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0]; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); + this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] + + cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]] + [INTER_OFFSET(ZEROMV)]; + this_rdc.dist = dist; + this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist); + // Switch to ZEROMV if the rdcost for ZEROMV on denoised source + // is lower than best_ref mode (on original source). + if (this_rdc.rdcost > best_rdc->rdcost) { + this_rdc = *best_rdc; + mi->mode = ctx_den->best_mode; + mi->ref_frame[0] = ctx_den->best_ref_frame; + mi->interp_filter = ctx_den->best_pred_filter; + if (ctx_den->best_ref_frame == INTRA_FRAME) + mi->mv[0].as_int = INVALID_MV; + else if (ctx_den->best_ref_frame == GOLDEN_FRAME) { + mi->mv[0].as_int = ctx_den->frame_mv[ctx_den->best_mode] + [ctx_den->best_ref_frame].as_int; + if (ctx_den->reuse_inter_pred) { + xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0]; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + } + } + mi->tx_size = ctx_den->best_tx_size; + x->skip_txfm[0] = ctx_den->best_mode_skip_txfm; + } else { + ctx_den->best_ref_frame = LAST_FRAME; + *best_rdc = this_rdc; + } + } +} +#endif // CONFIG_VP9_TEMPORAL_DENOISING + +static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const int i = (mi_row & 0x7) >> 1; + const int j = (mi_col & 0x7) >> 1; + int force_skip_low_temp_var = 0; + // Set force_skip_low_temp_var based on the block size and block offset. + if (bsize == BLOCK_64X64) { + force_skip_low_temp_var = variance_low[0]; + } else if (bsize == BLOCK_64X32) { + if (!(mi_col & 0x7) && !(mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[1]; + } else if (!(mi_col & 0x7) && (mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[2]; + } + } else if (bsize == BLOCK_32X64) { + if (!(mi_col & 0x7) && !(mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[3]; + } else if ((mi_col & 0x7) && !(mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[4]; + } + } else if (bsize == BLOCK_32X32) { + if (!(mi_col & 0x7) && !(mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[5]; + } else if ((mi_col & 0x7) && !(mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[6]; + } else if (!(mi_col & 0x7) && (mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[7]; + } else if ((mi_col & 0x7) && (mi_row & 0x7)) { + force_skip_low_temp_var = variance_low[8]; + } + } else if (bsize == BLOCK_16X16) { + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; + } else if (bsize == BLOCK_32X16) { + // The col shift index for the second 16x16 block. + const int j2 = ((mi_col + 2) & 0x7) >> 1; + // Only if each 16x16 block inside has low temporal variance. + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && + variance_low[pos_shift_16x16[i][j2]]; + } else if (bsize == BLOCK_16X32) { + // The row shift index for the second 16x16 block. + const int i2 = ((mi_row + 2) & 0x7) >> 1; + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && + variance_low[pos_shift_16x16[i2][j]]; + } + return force_skip_low_temp_var; +} + void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - TileInfo *const tile_info = &tile_data->tile_info; + const SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; PREDICTION_MODE best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; @@ -1094,14 +1346,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // var_y and sse_y are saved to be used in skipping checking unsigned int var_y = UINT_MAX; unsigned int sse_y = UINT_MAX; - // Reduce the intra cost penalty for small blocks (<=16x16). - const int reduction_fac = (bsize <= BLOCK_16X16) ? - ((bsize <= BLOCK_8X8) ? 4 : 2) : 0; - const int intra_cost_penalty = vp9_get_intra_cost_penalty( - cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac; - const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, + const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize); + int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0); - const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize]; + const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize]; const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; INTERP_FILTER filter_ref; const int bsl = mi_width_log2_lookup[bsize]; @@ -1129,6 +1377,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int best_pred_sad = INT_MAX; int best_early_term = 0; int ref_frame_cost[MAX_REF_FRAMES]; + int svc_force_zero_mode[3] = {0}; + int perform_intra_pred = 1; + int use_golden_nonzeromv = 1; + int force_skip_low_temp_var = 0; +#if CONFIG_VP9_TEMPORAL_DENOISING + VP9_PICKMODE_CTX_DEN ctx_den; + int64_t zero_last_cost_orig = INT64_MAX; +#endif init_ref_frame_cost(cm, xd, ref_frame_cost); @@ -1154,24 +1410,42 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; - if (xd->up_available) - filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; - else if (xd->left_available) - filter_ref = xd->mi[-1]->mbmi.interp_filter; + // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign + // filter_ref, we use a less strict condition on assigning filter_ref. + // This is to reduce the probabily of entering the flow of not assigning + // filter_ref and then skip filter search. + if (xd->above_mi && is_inter_block(xd->above_mi)) + filter_ref = xd->above_mi->interp_filter; + else if (xd->left_mi && is_inter_block(xd->left_mi)) + filter_ref = xd->left_mi->interp_filter; else filter_ref = cm->interp_filter; // initialize mode decisions vp9_rd_cost_reset(&best_rdc); vp9_rd_cost_reset(rd_cost); - mbmi->sb_type = bsize; - mbmi->ref_frame[0] = NONE; - mbmi->ref_frame[1] = NONE; - mbmi->tx_size = VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cm->tx_mode]); + mi->sb_type = bsize; + mi->ref_frame[0] = NONE; + mi->ref_frame[1] = NONE; + mi->tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cm->tx_mode]); + + if (sf->short_circuit_flat_blocks) { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + x->source_variance = vp9_high_get_sby_perpixel_variance( + cpi, &x->plane[0].src, bsize, xd->bd); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + x->source_variance = + vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + } #if CONFIG_VP9_TEMPORAL_DENOISING - vp9_denoiser_reset_frame_stats(ctx); + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->denoiser.denoising_level > kDenLowLow) { + vp9_denoiser_reset_frame_stats(ctx); + } #endif if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc) { @@ -1179,40 +1453,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } else { usable_ref_frame = GOLDEN_FRAME; } - for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); - - x->pred_mv_sad[ref_frame] = INT_MAX; - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; - frame_mv[ZEROMV][ref_frame].as_int = 0; - if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { - int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; - const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; - - vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, - sf, sf); + // For svc mode, on spatial_layer_id > 0: if the reference has different scale + // constrain the inter mode to only test zero motion. + if (cpi->use_svc && + svc ->force_zero_mode_spatial_ref && + cpi->svc.spatial_layer_id > 0) { + if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { + struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; + if (vp9_is_scaled(sf)) + svc_force_zero_mode[LAST_FRAME - 1] = 1; + } + if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) { + struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf; + if (vp9_is_scaled(sf)) + svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; + } + } - if (cm->use_prev_frame_mvs) - vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, - candidates, mi_row, mi_col, NULL, NULL, - x->mbmi_ext->mode_context); - else - const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info, - xd->mi[0], - ref_frame, candidates, - mi_row, mi_col); + if (cpi->sf.short_circuit_low_temp_var) { + force_skip_low_temp_var = + get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize); + } - vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, - &frame_mv[NEARESTMV][ref_frame], - &frame_mv[NEARMV][ref_frame]); + if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) + use_golden_nonzeromv = 0; - if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) - vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, - ref_frame, bsize); - } else { - ref_frame_skip_mask |= (1 << ref_frame); - } + for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { + find_predictors(cpi, x, ref_frame, frame_mv, const_motion, + &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col, + yv12_mb, bsize, force_skip_low_temp_var); } for (idx = 0; idx < RT_INTER_MODES; ++idx) { @@ -1224,21 +1495,52 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int is_skippable; int this_early_term = 0; PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode; + if (cpi->use_svc) this_mode = ref_mode_set_svc[idx].pred_mode; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && + this_mode != NEARESTMV) { + continue; + } + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; ref_frame = ref_mode_set[idx].ref_frame; - if (cpi->use_svc) + if (cpi->use_svc) { ref_frame = ref_mode_set_svc[idx].ref_frame; + } + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + if (const_motion[ref_frame] && this_mode == NEARMV) continue; - if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) { + // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var + // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped + // later. + if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME && + frame_mv[this_mode][ref_frame].as_int != 0) { + continue; + } + + if (cpi->sf.short_circuit_low_temp_var == 2 && + force_skip_low_temp_var && ref_frame == LAST_FRAME && + this_mode == NEWMV) { + continue; + } + + if (cpi->use_svc) { + if (svc_force_zero_mode[ref_frame - 1] && + frame_mv[this_mode][ref_frame].as_int != 0) + continue; + } + + if (!force_skip_low_temp_var && + !(frame_mv[this_mode][ref_frame].as_int == 0 && + ref_frame == LAST_FRAME)) { i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) @@ -1251,7 +1553,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - mbmi->ref_frame[0] = ref_frame; + mi->ref_frame[0] = ref_frame; set_ref_ptrs(cm, xd, ref_frame, NONE); mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; @@ -1262,9 +1564,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; if (this_mode == NEWMV) { - if (ref_frame > LAST_FRAME && !cpi->use_svc) { + if (ref_frame > LAST_FRAME && + !cpi->use_svc && + cpi->oxcf.rc_mode == VPX_CBR) { int tmp_sad; - int dis, cost_list[5]; + uint32_t dis; + int cost_list[5]; if (bsize < BLOCK_16X16) continue; @@ -1276,7 +1581,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) continue; - frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int; + frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int; rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); @@ -1293,13 +1598,44 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0); + } else if (svc->use_base_mv && svc->spatial_layer_id) { + if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV && + frame_mv[NEWMV][ref_frame].as_int != 0) { + const int pre_stride = xd->plane[0].pre[0].stride; + int base_mv_sad = INT_MAX; + const uint8_t * const pre_buf = xd->plane[0].pre[0].buf + + (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride + + (frame_mv[NEWMV][ref_frame].as_mv.col >> 3); + base_mv_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, + x->plane[0].src.stride, + pre_buf, pre_stride); + + // TODO(wonkap): make the decision to use base layer mv on RD; + // not just SAD. + if (base_mv_sad < x->pred_mv_sad[ref_frame]) { + // Base layer mv is good. + if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 1)) { + continue; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { + continue; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { + continue; + } } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) { + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { continue; } } - if (this_mode == NEWMV && ref_frame == LAST_FRAME && + // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no + // need to compute best_pred_sad which is only used to skip golden NEWMV. + if (use_golden_nonzeromv && this_mode == NEWMV && + ref_frame == LAST_FRAME && frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) { const int pre_stride = xd->plane[0].pre[0].stride; const uint8_t * const pre_buf = xd->plane[0].pre[0].buf + @@ -1311,28 +1647,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[LAST_FRAME] = best_pred_sad; } - if (cpi->use_svc) { - if (this_mode == NEWMV && ref_frame == GOLDEN_FRAME && - frame_mv[NEWMV][GOLDEN_FRAME].as_int != INVALID_MV) { - const int pre_stride = xd->plane[0].pre[0].stride; - const uint8_t * const pre_buf = xd->plane[0].pre[0].buf + - (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.row >> 3) * pre_stride + - (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.col >> 3); - best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, - x->plane[0].src.stride, - pre_buf, pre_stride); - x->pred_mv_sad[GOLDEN_FRAME] = best_pred_sad; - } - } - - if (this_mode != NEARESTMV && frame_mv[this_mode][ref_frame].as_int == frame_mv[NEARESTMV][ref_frame].as_int) continue; - mbmi->mode = this_mode; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + mi->mode = this_mode; + mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., @@ -1349,8 +1670,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && (ref_frame == LAST_FRAME || - (ref_frame == GOLDEN_FRAME && cpi->use_svc)) - && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) { + (ref_frame == GOLDEN_FRAME && + (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) && + (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { int pf_rate[3]; int64_t pf_dist[3]; unsigned int pf_var[3]; @@ -1362,13 +1684,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) { int64_t cost; - mbmi->interp_filter = filter; + mi->interp_filter = filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); - pf_tx_size[filter] = mbmi->tx_size; + pf_tx_size[filter] = mi->tx_size; if (cost < best_cost) { best_filter = filter; best_cost = cost; @@ -1379,12 +1701,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, free_pred_buffer(this_mode_pred); this_mode_pred = current_pred; } - - if (filter < EIGHTTAP_SHARP) { - current_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = current_pred->data; - pd->dst.stride = bw; - } + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; } } } @@ -1392,8 +1711,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (reuse_inter_pred && this_mode_pred != current_pred) free_pred_buffer(current_pred); - mbmi->interp_filter = best_filter; - mbmi->tx_size = pf_tx_size[best_filter]; + mi->interp_filter = best_filter; + mi->tx_size = pf_tx_size[best_filter]; this_rdc.rate = pf_rate[best_filter]; this_rdc.dist = pf_dist[best_filter]; var_y = pf_var[best_filter]; @@ -1404,13 +1723,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, pd->dst.stride = this_mode_pred->stride; } } else { - mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; + // TODO(jackychen): the low-bitdepth condition causes a segfault in + // high-bitdepth builds. + // https://bugs.chromium.org/p/webm/issues/detail?id=1250 +#if CONFIG_VP9_HIGHBITDEPTH + const int large_block = bsize > BLOCK_32X32; +#else + const int large_block = bsize >= BLOCK_32X32; +#endif + mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); // For large partition blocks, extra testing is done. - if (bsize > BLOCK_32X32 && - !cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id) && - cm->base_qindex) { + if (cpi->oxcf.rc_mode == VPX_CBR && large_block && + !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && + cm->base_qindex) { model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col, &this_early_term); @@ -1422,8 +1749,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!this_early_term) { this_sse = (int64_t)sse_y; - block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable, - &this_sse, 0, bsize, VPXMIN(mbmi->tx_size, TX_16X16)); + block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize, + VPXMIN(mi->tx_size, TX_16X16)); x->skip_txfm[0] = is_skippable; if (is_skippable) { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); @@ -1439,7 +1766,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) { - if ((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) + if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) this_rdc.rate += vp9_get_switchable_rate(cpi, xd); } } else { @@ -1449,17 +1776,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (x->color_sensitivity[0] || x->color_sensitivity[1]) { - int uv_rate = 0; - int64_t uv_dist = 0; + RD_COST rdc_uv; const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]); if (x->color_sensitivity[0]) vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); if (x->color_sensitivity[1]) vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); - model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist, - &var_y, &sse_y, 1, 2); - this_rdc.rate += uv_rate; - this_rdc.dist += uv_dist; + model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2); + this_rdc.rate += rdc_uv.rate; + this_rdc.dist += rdc_uv.dist; } this_rdc.rate += rate_mv; @@ -1469,6 +1794,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, this_rdc.rate += ref_frame_cost[ref_frame]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); + // Bias against non-zero motion + if (cpi->oxcf.rc_mode == VPX_CBR && + cpi->oxcf.speed >= 5 && + cpi->oxcf.content != VP9E_CONTENT_SCREEN && + !x->sb_is_skin) { + vp9_large_block_mv_bias(&cpi->noise_estimate, &this_rdc, bsize, + frame_mv[this_mode][ref_frame].as_mv.row, + frame_mv[this_mode][ref_frame].as_mv.col, + ref_frame == LAST_FRAME); + } + // Skipping checking: test to see if this block can be reconstructed by // prediction only. if (cpi->allow_encode_breakout) { @@ -1483,8 +1819,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) - vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx); + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->denoiser.denoising_level > kDenLowLow) { + vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx); + // Keep track of zero_last cost. + if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) + zero_last_cost_orig = this_rdc.rdcost; + } #else (void)ctx; #endif @@ -1492,8 +1833,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rdc.rdcost < best_rdc.rdcost || x->skip) { best_rdc = this_rdc; best_mode = this_mode; - best_pred_filter = mbmi->interp_filter; - best_tx_size = mbmi->tx_size; + best_pred_filter = mi->interp_filter; + best_tx_size = mi->tx_size; best_ref_frame = ref_frame; best_mode_skip_txfm = x->skip_txfm[0]; best_early_term = this_early_term; @@ -1518,20 +1859,33 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - mbmi->mode = best_mode; - mbmi->interp_filter = best_pred_filter; - mbmi->tx_size = best_tx_size; - mbmi->ref_frame[0] = best_ref_frame; - mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + mi->mode = best_mode; + mi->interp_filter = best_pred_filter; + mi->tx_size = best_tx_size; + mi->ref_frame[0] = best_ref_frame; + mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int; x->skip_txfm[0] = best_mode_skip_txfm; + // For spatial enhancemanent layer: perform intra prediction only if base + // layer is chosen as the reference. Always perform intra prediction if + // LAST is the only reference or is_key_frame is set. + if (cpi->svc.spatial_layer_id) { + perform_intra_pred = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || + !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || + (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame + && svc_force_zero_mode[best_ref_frame - 1]); + inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; + } // Perform intra prediction search, if the best SAD is above a certain // threshold. - if (best_rdc.rdcost == INT64_MAX || - (!x->skip && best_rdc.rdcost > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize)) { - struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; + if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && + perform_intra_pred && + (best_rdc.rdcost == INT64_MAX || + (!x->skip && best_rdc.rdcost > inter_mode_thresh && + bsize <= cpi->sf.max_intra_bsize))) { + struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; int i; TX_SIZE best_intra_tx_size = TX_SIZES; TX_SIZE intra_tx_size = @@ -1566,6 +1920,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && + this_mode != DC_PRED) { + continue; + } if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; @@ -1574,14 +1932,24 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rd_thresh_freq_fact[mode_index])) continue; - mbmi->mode = this_mode; - mbmi->ref_frame[0] = INTRA_FRAME; + mi->mode = this_mode; + mi->ref_frame[0] = INTRA_FRAME; + this_rdc.dist = this_rdc.rate = 0; args.mode = this_mode; - args.rate = 0; - args.dist = 0; - mbmi->tx_size = intra_tx_size; + args.skippable = 1; + args.rdc = &this_rdc; + mi->tx_size = intra_tx_size; vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, &args); + // Check skip cost here since skippable is not set for for uv, this + // mirrors the behavior used by inter + if (args.skippable) { + x->skip_txfm[0] = SKIP_TXFM_AC_DC; + this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1); + } else { + x->skip_txfm[0] = SKIP_TXFM_NONE; + this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0); + } // Inter and intra RD will mismatch in scale for non-screen content. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { if (x->color_sensitivity[0]) @@ -1591,8 +1959,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_foreach_transformed_block_in_plane(xd, bsize, 2, estimate_block_intra, &args); } - this_rdc.rate = args.rate; - this_rdc.dist = args.dist; this_rdc.rate += cpi->mbmode_cost[this_mode]; this_rdc.rate += ref_frame_cost[INTRA_FRAME]; this_rdc.rate += intra_cost_penalty; @@ -1602,29 +1968,33 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; best_mode = this_mode; - best_intra_tx_size = mbmi->tx_size; + best_intra_tx_size = mi->tx_size; best_ref_frame = INTRA_FRAME; - mbmi->uv_mode = this_mode; - mbmi->mv[0].as_int = INVALID_MV; + mi->uv_mode = this_mode; + mi->mv[0].as_int = INVALID_MV; best_mode_skip_txfm = x->skip_txfm[0]; } } // Reset mb_mode_info to the best inter mode. if (best_ref_frame != INTRA_FRAME) { - mbmi->tx_size = best_tx_size; + mi->tx_size = best_tx_size; } else { - mbmi->tx_size = best_intra_tx_size; + mi->tx_size = best_intra_tx_size; } } pd->dst = orig_dst; - mbmi->mode = best_mode; - mbmi->ref_frame[0] = best_ref_frame; + mi->mode = best_mode; + mi->ref_frame[0] = best_ref_frame; x->skip_txfm[0] = best_mode_skip_txfm; + if (!is_inter_block(mi)) { + mi->interp_filter = SWITCHABLE_FILTERS; + } + if (reuse_inter_pred && best_pred != NULL) { - if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) { + if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) vpx_highbd_convolve_copy(best_pred->data, best_pred->stride, @@ -1642,8 +2012,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->resize_pending == 0 && + cpi->denoiser.denoising_level > kDenLowLow && + cpi->denoiser.reset == 0) { + VP9_DENOISER_DECISION decision = COPY_BLOCK; + vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, + frame_mv, reuse_inter_pred, best_tx_size, + best_mode, best_ref_frame, best_pred_filter, + best_mode_skip_txfm); + vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision); + recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb, + &best_rdc, bsize, mi_row, mi_col); + best_ref_frame = ctx_den.best_ref_frame; + } +#endif + if (cpi->sf.adaptive_rd_thresh) { - THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)]; + THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)]; if (best_ref_frame == INTRA_FRAME) { // Only consider the modes that are included in the intra_mode_list. @@ -1677,12 +2064,12 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE; MV_REFERENCE_FRAME best_ref_frame = NONE; - unsigned char segment_id = mbmi->segment_id; + unsigned char segment_id = mi->segment_id; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; @@ -1708,8 +2095,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, - candidates, mi_row, mi_col, NULL, NULL, - mbmi_ext->mode_context); + candidates, mi_row, mi_col, mbmi_ext->mode_context); vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, &dummy_mv[0], &dummy_mv[1]); @@ -1718,13 +2104,13 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - mbmi->sb_type = bsize; - mbmi->tx_size = TX_4X4; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE; - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP - : cm->interp_filter; + mi->sb_type = bsize; + mi->tx_size = TX_4X4; + mi->uv_mode = DC_PRED; + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE; + mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { int64_t this_rd = 0; @@ -1733,6 +2119,13 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame_skip_mask & (1 << ref_frame)) continue; +#if CONFIG_BETTER_HW_COMPATIBILITY + if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) && + ref_frame > INTRA_FRAME && + vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) + continue; +#endif + // TODO(jingning, agrange): Scaling reference frame not supported for // sub8x8 blocks. Is this supported now? if (ref_frame > INTRA_FRAME && @@ -1745,7 +2138,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) continue; - mbmi->ref_frame[0] = ref_frame; + mi->ref_frame[0] = ref_frame; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); @@ -1799,7 +2192,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const int tmp_col_max = x->mv_col_max; const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; - int dummy_dist; + uint32_t dummy_dist; if (i == 0) { mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3; @@ -1862,7 +2255,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, - vp9_filter_kernels[mbmi->interp_filter], + vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i & 0x01), mi_row * MI_SIZE + 4 * (i >> 1), xd->bd); @@ -1874,7 +2267,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, - vp9_filter_kernels[mbmi->interp_filter], + vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i & 0x01), mi_row * MI_SIZE + 4 * (i >> 1)); @@ -1916,8 +2309,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } // reference frames - mbmi->tx_size = TX_4X4; - mbmi->ref_frame[0] = best_ref_frame; + mi->tx_size = TX_4X4; + mi->ref_frame[0] = best_ref_frame; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const int block = idy * 2 + idx; @@ -1928,7 +2321,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block]; } } - mbmi->mode = xd->mi[0]->bmi[3].as_mode; + mi->mode = xd->mi[0]->bmi[3].as_mode; ctx->mic = *(xd->mi[0]); ctx->mbmi_ext = *x->mbmi_ext; ctx->skip_txfm[0] = SKIP_TXFM_NONE; diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c index cb3e21a56..d68b6845c 100644 --- a/libvpx/vp9/encoder/vp9_quantize.c +++ b/libvpx/vp9/encoder/vp9_quantize.c @@ -94,7 +94,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[rc != 0]; - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16); + const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (abs_qcoeff) @@ -219,12 +219,12 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, static void invert_quant(int16_t *quant, int16_t *shift, int d) { unsigned t; - int l; + int l, m; t = d; for (l = 0; t > 1; l++) t >>= 1; - t = 1 + (1 << (16 + l)) / d; - *quant = (int16_t)(t - (1 << 16)); + m = 1 + (1 << (16 + l)) / d; + *quant = (int16_t)(m - (1 << 16)); *shift = 1 << (16 - l); } @@ -308,7 +308,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; QUANTS *const quants = &cpi->quants; - const int segment_id = xd->mi[0]->mbmi.segment_id; + const int segment_id = xd->mi[0]->segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); int i; @@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); x->q_index = qindex; - x->errorperbit = rdmult >> 6; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rdmult); vp9_initialize_me_consts(cpi, x, x->q_index); } diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c index d70068570..b45f8d0d9 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/libvpx/vp9/encoder/vp9_ratectrl.c @@ -133,7 +133,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m, kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth); arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth); arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth); - inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth); + inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth); rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth); } } @@ -337,6 +337,10 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->total_actual_bits = 0; rc->total_target_bits = 0; rc->total_target_vs_actual = 0; + rc->avg_frame_low_motion = 0; + rc->high_source_sad = 0; + rc->count_last_scene_change = 0; + rc->avg_source_sad = 0; rc->frames_since_key = 8; // Sensible default for first frame. rc->this_key_frame_forced = 0; @@ -370,8 +374,9 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; - - if (!oxcf->drop_frames_water_mark) { + if (!oxcf->drop_frames_water_mark || + (is_one_pass_cbr_svc(cpi) && + cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) { return 0; } else { if (rc->buffer_level < 0) { @@ -499,6 +504,12 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { else cpi->rc.rc_1_frame = 0; + // Turn off oscilation detection in the case of massive overshoot. + if (cpi->rc.rc_1_frame == -1 && cpi->rc.rc_2_frame == 1 && + correction_factor > 1000) { + cpi->rc.rc_2_frame = 0; + } + if (correction_factor > 102) { // We are not already at the worst allowable quality correction_factor = (int)(100 + ((correction_factor - 100) * @@ -614,15 +625,16 @@ static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { if (cpi->common.frame_type == KEY_FRAME) { active_worst_quality = curr_frame == 0 ? rc->worst_quality - : rc->last_q[KEY_FRAME] * 2; + : rc->last_q[KEY_FRAME] << 1; } else { if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4 + active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 >> 2 : rc->last_q[INTER_FRAME]; } else { - active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2 - : rc->last_q[INTER_FRAME] * 2; + active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] << 1 : + VPXMIN(rc->last_q[INTER_FRAME] << 1, + (rc->avg_frame_qindex[INTER_FRAME] * 3 >> 1)); } } return VPXMIN(active_worst_quality, rc->worst_quality); @@ -655,7 +667,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { VPXMIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : rc->avg_frame_qindex[INTER_FRAME]; - active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4); + active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment, ~30%. @@ -804,8 +816,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, return q; } -static int get_active_cq_level(const RATE_CONTROL *rc, - const VP9EncoderConfig *const oxcf) { +static int get_active_cq_level_one_pass( + const RATE_CONTROL *rc, const VP9EncoderConfig *const oxcf) { static const double cq_adjust_threshold = 0.1; int active_cq_level = oxcf->cq_level; if (oxcf->rc_mode == VPX_CQ && @@ -818,13 +830,36 @@ static int get_active_cq_level(const RATE_CONTROL *rc, return active_cq_level; } +#define SMOOTH_PCT_MIN 0.1 +#define SMOOTH_PCT_DIV 0.05 +static int get_active_cq_level_two_pass( + const TWO_PASS *twopass, const RATE_CONTROL *rc, + const VP9EncoderConfig *const oxcf) { + static const double cq_adjust_threshold = 0.1; + int active_cq_level = oxcf->cq_level; + if (oxcf->rc_mode == VPX_CQ) { + if (twopass->mb_smooth_pct > SMOOTH_PCT_MIN) { + active_cq_level -= (int)((twopass->mb_smooth_pct - SMOOTH_PCT_MIN) / + SMOOTH_PCT_DIV); + active_cq_level = VPXMAX(active_cq_level, 0); + } + if (rc->total_target_bits > 0) { + const double x = (double)rc->total_actual_bits / rc->total_target_bits; + if (x < cq_adjust_threshold) { + active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold); + } + } + } + return active_cq_level; +} + static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int *bottom_index, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int cq_level = get_active_cq_level(rc, oxcf); + const int cq_level = get_active_cq_level_one_pass(rc, oxcf); int active_best_quality; int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); int q; @@ -832,10 +867,16 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); if (frame_is_intra_only(cm)) { - // Handle the special case for key frames forced when we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping. - if (rc->this_key_frame_forced) { + if (oxcf->rc_mode == VPX_Q) { + int qindex = cq_level; + double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); + int delta_qindex = vp9_compute_qdelta(rc, q, q * 0.25, + cm->bit_depth); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); + } else if (rc->this_key_frame_forced) { + // Handle the special case for key frames forced when we have reached + // the maximum key frame interval. Here force the Q to a range + // based on the ambient Q to reduce the risk of popping. int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, @@ -868,9 +909,12 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. - if (rc->frames_since_key > 1 && - rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { - q = rc->avg_frame_qindex[INTER_FRAME]; + if (rc->frames_since_key > 1) { + if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { + q = rc->avg_frame_qindex[INTER_FRAME]; + } else { + q = active_worst_quality; + } } else { q = rc->avg_frame_qindex[KEY_FRAME]; } @@ -885,23 +929,37 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality = active_best_quality * 15 / 16; } else if (oxcf->rc_mode == VPX_Q) { - if (!cpi->refresh_alt_ref_frame) { - active_best_quality = cq_level; - } else { - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); - } + int qindex = cq_level; + double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); + int delta_qindex; + if (cpi->refresh_alt_ref_frame) + delta_qindex = vp9_compute_qdelta(rc, q, q * 0.40, cm->bit_depth); + else + delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); } } else { if (oxcf->rc_mode == VPX_Q) { - active_best_quality = cq_level; + int qindex = cq_level; + double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); + double delta_rate[FIXED_GF_INTERVAL] = + {0.50, 1.0, 0.85, 1.0, 0.70, 1.0, 0.85, 1.0}; + int delta_qindex = + vp9_compute_qdelta(rc, q, + q * delta_rate[cm->current_video_frame % + FIXED_GF_INTERVAL], cm->bit_depth); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { - // Use the lower of active_worst_quality and recent/average Q. - if (cm->current_video_frame > 1) - active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; - else + // Use the min of the average Q and active_worst_quality as basis for + // active_best. + if (cm->current_video_frame > 1) { + q = VPXMIN(rc->avg_frame_qindex[INTER_FRAME], active_worst_quality); + active_best_quality = inter_minq[q]; + } else { active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + } // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->rc_mode == VPX_CQ) && @@ -993,7 +1051,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const GF_GROUP *gf_group = &cpi->twopass.gf_group; - const int cq_level = get_active_cq_level(rc, oxcf); + const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf); int active_best_quality; int active_worst_quality = cpi->twopass.active_worst_quality; int q; @@ -1074,7 +1132,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; } else { - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. @@ -1101,8 +1159,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Extension to max or min Q if undershoot or overshoot is outside // the permitted range. - if ((cpi->oxcf.rc_mode != VPX_Q) && - (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) { + if (cpi->oxcf.rc_mode != VPX_Q) { if (frame_is_intra_only(cm) || (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { @@ -1256,8 +1313,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { rc->frames_since_golden = 0; // If we are not using alt ref in the up and coming group clear the arf - // active flag. - if (!rc->source_alt_ref_pending) { + // active flag. In multi arf group case, if the index is not 0 then + // we are overlaying a mid group arf so should not reset the flag. + if (cpi->oxcf.pass == 2) { + if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0)) + rc->source_alt_ref_active = 0; + } else if (!rc->source_alt_ref_pending) { rc->source_alt_ref_active = 0; } @@ -1274,6 +1335,26 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { } } +static void compute_frame_low_motion(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + int mi_row, mi_col; + MODE_INFO **mi = cm->mi_grid_visible; + RATE_CONTROL *const rc = &cpi->rc; + const int rows = cm->mi_rows, cols = cm->mi_cols; + int cnt_zeromv = 0; + for (mi_row = 0; mi_row < rows; mi_row++) { + for (mi_col = 0; mi_col < cols; mi_col++) { + if (abs(mi[0]->mv[0].as_mv.row) < 16 && + abs(mi[0]->mv[0].as_mv.col) < 16) + cnt_zeromv++; + mi++; + } + mi += 8; + } + cnt_zeromv = 100 * cnt_zeromv / (rows * cols); + rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2; +} + void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1308,9 +1389,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } } } else { - if (rc->is_src_frame_alt_ref || - !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) || - (cpi->use_svc && oxcf->rc_mode == VPX_CBR)) { + if ((cpi->use_svc && oxcf->rc_mode == VPX_CBR) || + (!rc->is_src_frame_alt_ref && + !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { rc->last_q[INTER_FRAME] = qindex; rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2); @@ -1383,6 +1464,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->next_frame_size_selector != rc->frame_size_selector; rc->frame_size_selector = rc->next_frame_size_selector; } + + if (oxcf->pass == 0) { + if (cm->frame_type != KEY_FRAME) + compute_frame_low_motion(cpi); + } } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { @@ -1421,6 +1507,24 @@ static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } +static void adjust_gf_key_frame(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + rc->constrained_gf_group = 0; + // Reset gf interval to make more equal spacing for up-coming key frame. + if ((rc->frames_to_key <= 7 * rc->baseline_gf_interval >> 2) && + (rc->frames_to_key > rc->baseline_gf_interval)) { + rc->baseline_gf_interval = rc->frames_to_key >> 1; + if (rc->baseline_gf_interval < 5) + rc->baseline_gf_interval = rc->frames_to_key; + rc->constrained_gf_group = 1; + } else { + // Reset since frames_till_gf_update_due must be <= frames_to_key. + if (rc->baseline_gf_interval > rc->frames_to_key) { + rc->baseline_gf_interval = rc->frames_to_key; + rc->constrained_gf_group = 1; + } + } +} void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -1441,24 +1545,41 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { cm->frame_type = INTER_FRAME; } if (rc->frames_till_gf_update_due == 0) { - rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - // NOTE: frames_till_gf_update_due must be <= frames_to_key. - if (rc->frames_till_gf_update_due > rc->frames_to_key) { - rc->frames_till_gf_update_due = rc->frames_to_key; - rc->constrained_gf_group = 1; + double rate_err = 1.0; + rc->gfu_boost = DEFAULT_GF_BOOST; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) { + vp9_cyclic_refresh_set_golden_update(cpi); } else { - rc->constrained_gf_group = 0; + rc->baseline_gf_interval = + (rc->min_gf_interval + rc->max_gf_interval) / 2; } + if (rc->rolling_target_bits > 0) + rate_err = + (double)rc->rolling_actual_bits / (double)rc->rolling_target_bits; + // Increase gf interval at high Q and high overshoot. + if (cm->current_video_frame > 30 && + rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 && + rate_err > 3.5) { + rc->baseline_gf_interval = + VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1); + } else if (cm->current_video_frame > 30 && + rc->avg_frame_low_motion < 20) { + // Decrease boost and gf interval for high motion case. + rc->gfu_boost = DEFAULT_GF_BOOST >> 1; + rc->baseline_gf_interval = VPXMAX(5, rc->baseline_gf_interval >> 1); + } + adjust_gf_key_frame(cpi); + rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->refresh_golden_frame = 1; rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; - rc->gfu_boost = DEFAULT_GF_BOOST; } if (cm->frame_type == KEY_FRAME) target = calc_iframe_target_size_one_pass_vbr(cpi); else target = calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) + vp9_cyclic_refresh_update_parameters(cpi); } static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { @@ -1539,41 +1660,31 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } -// Reset information needed to set proper reference frames and buffer updates -// for temporal layering. This is called when a key frame is encoded. -static void reset_temporal_layer_to_zero(VP9_COMP *cpi) { - int sl; - LAYER_CONTEXT *lc = NULL; - cpi->svc.temporal_layer_id = 0; - - for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { - lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; - lc->current_video_frame_in_layer = 0; - lc->frames_from_key_frame = 0; - } -} - void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target = rc->avg_frame_bandwidth; - const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); - + // Periodic key frames is based on the super-frame counter + // (svc.current_superframe), also only base spatial layer is key frame. if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (rc->frames_since_key % - cpi->oxcf.key_freq == 0))) { + (cpi->oxcf.auto_key && + (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) && + cpi->svc.spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (is_two_pass_svc(cpi)) { cpi->svc.layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); } else if (is_one_pass_cbr_svc(cpi)) { + if (cm->current_video_frame > 0) + vp9_svc_reset_key_frame(cpi); + layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); cpi->svc.layer_context[layer].is_key_frame = 1; - reset_temporal_layer_to_zero(cpi); cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); // Assumption here is that LAST_FRAME is being updated for a keyframe. @@ -1715,29 +1826,36 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, RATE_CONTROL *const rc) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; - // Set Maximum gf/arf interval - rc->max_gf_interval = oxcf->max_gf_interval; - rc->min_gf_interval = oxcf->min_gf_interval; - if (rc->min_gf_interval == 0) - rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( - oxcf->width, oxcf->height, cpi->framerate); - if (rc->max_gf_interval == 0) - rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( - cpi->framerate, rc->min_gf_interval); + // Special case code for 1 pass fixed Q mode tests + if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) { + rc->max_gf_interval = FIXED_GF_INTERVAL; + rc->min_gf_interval = FIXED_GF_INTERVAL; + rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL; + } else { + // Set Maximum gf/arf interval + rc->max_gf_interval = oxcf->max_gf_interval; + rc->min_gf_interval = oxcf->min_gf_interval; + if (rc->min_gf_interval == 0) + rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( + oxcf->width, oxcf->height, cpi->framerate); + if (rc->max_gf_interval == 0) + rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( + cpi->framerate, rc->min_gf_interval); + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; + + if (is_altref_enabled(cpi)) { + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; - if (is_altref_enabled(cpi)) { - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + // Clamp min to max + rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval); } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; - - // Clamp min to max - rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval); } void vp9_rc_update_framerate(VP9_COMP *cpi) { @@ -1774,27 +1892,28 @@ static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) { RATE_CONTROL *const rc = &cpi->rc; int64_t vbr_bits_off_target = rc->vbr_bits_off_target; int max_delta; - double position_factor = 1.0; - - // How far through the clip are we. - // This number is used to damp the per frame rate correction. - // Range 0 - 1.0 - if (cpi->twopass.total_stats.count) { - position_factor = sqrt((double)cpi->common.current_video_frame / - cpi->twopass.total_stats.count); - } - max_delta = (int)(position_factor * - ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100)); - - // vbr_bits_off_target > 0 means we have extra bits to spend - if (vbr_bits_off_target > 0) { - *this_frame_target += - (vbr_bits_off_target > max_delta) ? max_delta - : (int)vbr_bits_off_target; - } else { - *this_frame_target -= - (vbr_bits_off_target < -max_delta) ? max_delta - : (int)-vbr_bits_off_target; + int frame_window = VPXMIN(16, + ((int)cpi->twopass.total_stats.count - cpi->common.current_video_frame)); + + // Calcluate the adjustment to rate for this frame. + if (frame_window > 0) { + max_delta = (vbr_bits_off_target > 0) + ? (int)(vbr_bits_off_target / frame_window) + : (int)(-vbr_bits_off_target / frame_window); + + max_delta = VPXMIN(max_delta, + ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100)); + + // vbr_bits_off_target > 0 means we have extra bits to spend + if (vbr_bits_off_target > 0) { + *this_frame_target += + (vbr_bits_off_target > max_delta) ? max_delta + : (int)vbr_bits_off_target; + } else { + *this_frame_target -= + (vbr_bits_off_target < -max_delta) ? max_delta + : (int)-vbr_bits_off_target; + } } // Fast redistribution of bits arising from massive local undershoot. @@ -1835,6 +1954,9 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { RESIZE_ACTION resize_action = NO_RESIZE; int avg_qp_thr1 = 70; int avg_qp_thr2 = 50; + int min_width = 180; + int min_height = 180; + int down_size_on = 1; cpi->resize_scale_num = 1; cpi->resize_scale_den = 1; // Don't resize on key frame; reset the counters on key frame. @@ -1843,6 +1965,21 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { cpi->resize_count = 0; return 0; } + // Check current frame reslution to avoid generating frames smaller than + // the minimum resolution. + if (ONEHALFONLY_RESIZE) { + if ((cm->width >> 1) < min_width || (cm->height >> 1) < min_height) + down_size_on = 0; + } else { + if (cpi->resize_state == ORIG && + (cm->width * 3 / 4 < min_width || + cm->height * 3 / 4 < min_height)) + return 0; + else if (cpi->resize_state == THREE_QUARTER && + ((cpi->oxcf.width >> 1) < min_width || + (cpi->oxcf.height >> 1) < min_height)) + down_size_on = 0; + } #if CONFIG_VP9_TEMPORAL_DENOISING // If denoiser is on, apply a smaller qp threshold. @@ -1854,7 +1991,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { // Resize based on average buffer underflow and QP over some window. // Ignore samples close to key frame, since QP is usually high after key. - if (cpi->rc.frames_since_key > 1 * cpi->framerate) { + if (cpi->rc.frames_since_key > 2 * cpi->framerate) { const int window = (int)(4 * cpi->framerate); cpi->resize_avg_qp += cm->base_qindex; if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100)) @@ -1869,7 +2006,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { // down state, i.e. 1/2 or 3/4 of original resolution. // Currently, use a flag to turn 3/4 resizing feature on/off. if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) { - if (cpi->resize_state == THREE_QUARTER) { + if (cpi->resize_state == THREE_QUARTER && down_size_on) { resize_action = DOWN_ONEHALF; cpi->resize_state = ONE_HALF; } else if (cpi->resize_state == ORIG) { @@ -1955,13 +2092,17 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; rc->high_source_sad = 0; - if (cpi->Last_Source != NULL) { + if (cpi->Last_Source != NULL && + cpi->Last_Source->y_width == cpi->Source->y_width && + cpi->Last_Source->y_height == cpi->Source->y_height) { const uint8_t *src_y = cpi->Source->y_buffer; const int src_ystride = cpi->Source->y_stride; const uint8_t *last_src_y = cpi->Last_Source->y_buffer; const int last_src_ystride = cpi->Last_Source->y_stride; int sbi_row, sbi_col; const BLOCK_SIZE bsize = BLOCK_64X64; + uint32_t min_thresh = 4000; + float thresh = 8.0f; // Loop over sub-sample of frame, and compute average sad over 64x64 blocks. uint64_t avg_sad = 0; int num_samples = 0; @@ -1992,12 +2133,37 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { // between current and the previous frame value(s). Use a minimum threshold // for cases where there is small change from content that is completely // static. - if (avg_sad > VPXMAX(4000, (rc->avg_source_sad << 3)) && + if (cpi->oxcf.rc_mode == VPX_VBR) { + min_thresh = 60000; + thresh = 2.1f; + } + if (avg_sad > + VPXMAX(min_thresh, (unsigned int)(rc->avg_source_sad * thresh)) && rc->frames_since_key > 1) rc->high_source_sad = 1; else rc->high_source_sad = 0; - rc->avg_source_sad = (rc->avg_source_sad + avg_sad) >> 1; + if (avg_sad > 0 || cpi->oxcf.rc_mode == VPX_CBR) + rc->avg_source_sad = (3 * rc->avg_source_sad + avg_sad) >> 2; + // For VBR, under scene change/high content change, force golden refresh. + if (cpi->oxcf.rc_mode == VPX_VBR && + rc->high_source_sad && + rc->frames_to_key > 3 && + rc->count_last_scene_change > 4 && + cpi->ext_refresh_frame_flags_pending == 0) { + int target; + cpi->refresh_golden_frame = 1; + rc->gfu_boost = DEFAULT_GF_BOOST >> 1; + rc->baseline_gf_interval = VPXMIN(20, + VPXMAX(10, rc->baseline_gf_interval)); + adjust_gf_key_frame(cpi); + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + target = calc_pframe_target_size_one_pass_vbr(cpi); + vp9_rc_set_frame_target(cpi, target); + rc->count_last_scene_change = 0; + } else { + rc->count_last_scene_change++; + } } } diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h index 136fd3e78..7024bcfa9 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/libvpx/vp9/encoder/vp9_ratectrl.h @@ -26,6 +26,7 @@ extern "C" { #define MIN_GF_INTERVAL 4 #define MAX_GF_INTERVAL 16 +#define FIXED_GF_INTERVAL 8 // Used in some testing modes only #define ONEHALFONLY_RESIZE 0 typedef enum { @@ -160,6 +161,8 @@ typedef struct { uint64_t avg_source_sad; int high_source_sad; + int count_last_scene_change; + int avg_frame_low_motion; } RATE_CONTROL; struct VP9_COMP; diff --git a/libvpx/vp9/encoder/vp9_rd.c b/libvpx/vp9/encoder/vp9_rd.c index b085c7a0c..91b291187 100644 --- a/libvpx/vp9/encoder/vp9_rd.c +++ b/libvpx/vp9/encoder/vp9_rd.c @@ -41,7 +41,6 @@ #include "vp9/encoder/vp9_tokenize.h" #define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 @@ -76,10 +75,12 @@ static void fill_mode_costs(VP9_COMP *cpi) { vp9_intra_mode_tree); vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); - vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME], - vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME], - fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); + for (i = 0; i < INTRA_MODES; ++i) { + vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i], + vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree); + vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i], + fc->uv_mode_prob[i], vp9_intra_mode_tree); + } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) vp9_cost_tokens(cpi->switchable_interp_costs[i], @@ -277,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rd->RDMULT); x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; @@ -286,29 +286,37 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { set_block_thresholds(cm, rd); set_partition_probs(cm, xd); - if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) - fill_token_costs(x->token_costs, cm->fc->coef_probs); - - if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || - cm->frame_type == KEY_FRAME) { - for (i = 0; i < PARTITION_CONTEXTS; ++i) - vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), - vp9_partition_tree); - } + if (cpi->oxcf.pass == 1) { + if (!frame_is_intra_only(cm)) + vp9_build_nmv_cost_table( + x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, + &cm->fc->nmvc, cm->allow_high_precision_mv); + } else { + if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) + fill_token_costs(x->token_costs, cm->fc->coef_probs); + + if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || + cm->frame_type == KEY_FRAME) { + for (i = 0; i < PARTITION_CONTEXTS; ++i) + vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), + vp9_partition_tree); + } - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || - cm->frame_type == KEY_FRAME) { - fill_mode_costs(cpi); + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || + cm->frame_type == KEY_FRAME) { + fill_mode_costs(cpi); - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc->nmvc, cm->allow_high_precision_mv); + if (!frame_is_intra_only(cm)) { + vp9_build_nmv_cost_table( + x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, + &cm->fc->nmvc, cm->allow_high_precision_mv); - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)cpi->inter_mode_cost[i], - cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + vp9_cost_tokens((int *)cpi->inter_mode_cost[i], + cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); + } } } } @@ -341,6 +349,7 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { 38, 28, 21, 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0, }; + // Normalized distortion: // This table models the normalized distortion for a Laplacian source // with given variance when quantized with a uniform quantizer @@ -407,7 +416,7 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); model_rd_norm(xsq_q10, &r_q10, &d_q10); - *rate = ((r_q10 << n_log2) + 2) >> 2; + *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT); *dist = (var * (int64_t)d_q10 + 512) >> 10; } } @@ -555,10 +564,10 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, } int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { - const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const MODE_INFO *const mi = xd->mi[0]; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * - cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; + cpi->switchable_interp_costs[ctx][mi->interp_filter]; } void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h index 28385c981..9b8e2732c 100644 --- a/libvpx/vp9/encoder/vp9_rd.h +++ b/libvpx/vp9/encoder/vp9_rd.h @@ -17,15 +17,17 @@ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_context_tree.h" +#include "vp9/encoder/vp9_cost.h" #ifdef __cplusplus extern "C" { #endif #define RDDIV_BITS 7 +#define RD_EPB_SHIFT 6 #define RDCOST(RM, DM, R, D) \ - (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) + (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM)) #define QIDX_SKIP_THRESH 115 #define MV_COST_WEIGHT 108 @@ -167,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; } +static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { + x->errorperbit = rdmult >> RD_EPB_SHIFT; + x->errorperbit += (x->errorperbit == 0); +} + void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size); @@ -181,6 +188,15 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd, int vp9_get_intra_cost_penalty(int qindex, int qdelta, vpx_bit_depth_t bit_depth); +unsigned int vp9_get_sby_perpixel_variance(struct VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs); +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs, int bd); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 4f3a06e99..e65e05112 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -165,7 +165,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int i; int64_t rate_sum = 0; int64_t dist_sum = 0; - const int ref = xd->mi[0]->mbmi.ref_frame[0]; + const int ref = xd->mi[0]->ref_frame[0]; unsigned int sse; unsigned int var = 0; unsigned int sum_sse = 0; @@ -248,7 +248,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int quantizer = (pd->dequant[1] >> dequant_shift); if (quantizer < 120) - rate = (square_error * (280 - quantizer)) >> 8; + rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT); else rate = 0; dist = (square_error * quantizer) >> 8; @@ -361,73 +361,96 @@ static int cost_coeffs(MACROBLOCK *x, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; const struct macroblock_plane *p = &x->plane[plane]; const PLANE_TYPE type = get_plane_type(plane); const int16_t *band_count = &band_counts[tx_size][1]; const int eob = p->eobs[block]; const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = - x->token_costs[tx_size][type][is_inter_block(mbmi)]; + x->token_costs[tx_size][type][is_inter_block(mi)]; uint8_t token_cache[32 * 32]; int pt = combine_entropy_contexts(*A, *L); int c, cost; #if CONFIG_VP9_HIGHBITDEPTH - const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); + const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else - const int16_t *cat6_high_cost = vp9_get_high_cost_table(8); + const int *cat6_high_cost = vp9_get_high_cost_table(8); #endif // Check for consistency of tx_size with mode info - assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : - get_uv_tx_size(mbmi, &xd->plane[plane]) == tx_size); + assert(type == PLANE_TYPE_Y ? mi->tx_size == tx_size : + get_uv_tx_size(mi, &xd->plane[plane]) == tx_size); if (eob == 0) { // single eob token cost = token_costs[0][0][pt][EOB_TOKEN]; c = 0; } else { - int band_left = *band_count++; - - // dc token - int v = qcoeff[0]; - int16_t prev_t; - EXTRABIT e; - vp9_get_token_extra(v, &prev_t, &e); - cost = (*token_costs)[0][pt][prev_t] + - vp9_get_cost(prev_t, e, cat6_high_cost); - - token_cache[0] = vp9_pt_energy_class[prev_t]; - ++token_costs; - - // ac tokens - for (c = 1; c < eob; c++) { - const int rc = scan[c]; - int16_t t; - - v = qcoeff[rc]; - vp9_get_token_extra(v, &t, &e); - if (use_fast_coef_costing) { - cost += (*token_costs)[!prev_t][!prev_t][t] + - vp9_get_cost(t, e, cat6_high_cost); - } else { - pt = get_coef_context(nb, token_cache, c); - cost += (*token_costs)[!prev_t][pt][t] + - vp9_get_cost(t, e, cat6_high_cost); - token_cache[rc] = vp9_pt_energy_class[t]; - } - prev_t = t; - if (!--band_left) { - band_left = *band_count++; - ++token_costs; + if (use_fast_coef_costing) { + int band_left = *band_count++; + + // dc token + int v = qcoeff[0]; + int16_t prev_t; + cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost); + cost += (*token_costs)[0][pt][prev_t]; + + token_cache[0] = vp9_pt_energy_class[prev_t]; + ++token_costs; + + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + int16_t t; + + v = qcoeff[rc]; + cost += vp9_get_token_cost(v, &t, cat6_high_cost); + cost += (*token_costs)[!prev_t][!prev_t][t]; + prev_t = t; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } } - } - // eob token - if (band_left) { - if (use_fast_coef_costing) { + // eob token + if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; - } else { + + } else { // !use_fast_coef_costing + int band_left = *band_count++; + + // dc token + int v = qcoeff[0]; + int16_t tok; + unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; + cost = vp9_get_token_cost(v, &tok, cat6_high_cost); + cost += (*token_costs)[0][pt][tok]; + + token_cache[0] = vp9_pt_energy_class[tok]; + ++token_costs; + + tok_cost_ptr = &((*token_costs)[!tok]); + + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + + v = qcoeff[rc]; + cost += vp9_get_token_cost(v, &tok, cat6_high_cost); + pt = get_coef_context(nb, token_cache, c); + cost += (*tok_cost_ptr)[pt][tok]; + token_cache[rc] = vp9_pt_energy_class[tok]; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } + tok_cost_ptr = &((*token_costs)[!tok]); + } + + // eob token + if (band_left) { pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } @@ -461,7 +484,7 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, #endif // CONFIG_VP9_HIGHBITDEPTH *out_sse = this_sse >> shift; - if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { + if (x->skip_encode && !is_inter_block(xd->mi[0])) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> @@ -491,7 +514,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; int64_t rd1, rd2, rd; int rate; int64_t dist; @@ -500,8 +523,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (args->exit_early) return; - if (!is_inter_block(mbmi)) { - struct encode_b_args arg = {x, NULL, &mbmi->skip}; + if (!is_inter_block(mi)) { + struct encode_b_args arg = {x, NULL, &mi->skip}; vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (max_txsize_lookup[plane_bsize] == tx_size) { @@ -588,7 +611,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, args.skippable = 1; if (plane == 0) - xd->mi[0]->mbmi.tx_size = tx_size; + xd->mi[0]->tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); @@ -618,13 +641,13 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; - mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size); + mi->tx_size = VPXMIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs, - mbmi->tx_size, cpi->sf.use_fast_coef_costing); + mi->tx_size, cpi->sf.use_fast_coef_costing); } static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, @@ -637,7 +660,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; @@ -684,7 +707,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, if (d[n] == INT64_MAX || r[n][0] == INT_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; } else if (s[n]) { - if (is_inter_block(mbmi)) { + if (is_inter_block(mi)) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); r[n][1] -= r_tx_size; } else { @@ -696,7 +719,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } - if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { + if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); } @@ -713,12 +736,12 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, best_rd = rd[n][1]; } } - mbmi->tx_size = best_tx; + mi->tx_size = best_tx; - *distortion = d[mbmi->tx_size]; - *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; - *skip = s[mbmi->tx_size]; - *psse = sse[mbmi->tx_size]; + *distortion = d[mi->tx_size]; + *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT]; + *skip = s[mi->tx_size]; + *psse = sse[mi->tx_size]; } static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -729,7 +752,7 @@ static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t sse; int64_t *ret_sse = psse ? psse : &sse; - assert(bs == xd->mi[0]->mbmi.sb_type); + assert(bs == xd->mi[0]->sb_type); if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, @@ -787,10 +810,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH uint16_t best_dst16[8 * 8]; #endif + memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0])); + memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0])); - memcpy(ta, a, sizeof(ta)); - memcpy(tl, l, sizeof(tl)); - xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->tx_size = TX_4X4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -810,8 +833,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, continue; } - memcpy(tempa, ta, sizeof(ta)); - memcpy(templ, tl, sizeof(tl)); + memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); + memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { @@ -874,8 +897,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; - memcpy(a, tempa, sizeof(tempa)); - memcpy(l, templ, sizeof(templ)); + memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); + memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { memcpy(best_dst16 + idy * 8, CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), @@ -914,8 +937,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, continue; } - memcpy(tempa, ta, sizeof(ta)); - memcpy(templ, tl, sizeof(tl)); + memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); + memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { @@ -976,8 +999,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; - memcpy(a, tempa, sizeof(tempa)); - memcpy(l, templ, sizeof(templ)); + memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); + memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, num_4x4_blocks_wide * 4); @@ -1005,7 +1028,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, MODE_INFO *const mic = xd->mi[0]; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; - const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -1013,12 +1036,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int64_t total_distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[4], t_left[4]; const int *bmode_costs = cpi->mbmode_cost; - memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); - memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -1034,8 +1053,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, } this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode, - bmode_costs, t_above + idx, t_left + idy, + bmode_costs, + xd->plane[0].above_context + idx, + xd->plane[0].left_context + idy, &r, &ry, &d, bsize, best_rd - total_rd); + if (this_rd >= best_rd - total_rd) return INT64_MAX; @@ -1058,7 +1080,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, *rate = cost; *rate_y = tot_rate_y; *distortion = total_distortion; - mic->mbmi.mode = mic->bmi[3].as_mode; + mic->mode = mic->bmi[3].as_mode; return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); } @@ -1095,7 +1117,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, break; } - mic->mbmi.mode = mode; + mic->mode = mode; super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, bsize, best_rd); @@ -1109,7 +1131,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; - best_tx = mic->mbmi.tx_size; + best_tx = mic->tx_size; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; @@ -1117,8 +1139,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - mic->mbmi.mode = mode_selected; - mic->mbmi.tx_size = best_tx; + mic->mode = mode_selected; + mic->tx_size = best_tx; return best_rd; } @@ -1130,8 +1152,8 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); + MODE_INFO *const mi = xd->mi[0]; + const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); int plane; int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; @@ -1140,7 +1162,7 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, if (ref_best_rd < 0) is_cost_valid = 0; - if (is_inter_block(mbmi) && is_cost_valid) { + if (is_inter_block(mi) && is_cost_valid) { int plane; for (plane = 1; plane < MAX_MB_PLANE; ++plane) vp9_subtract_plane(x, bsize, plane); @@ -1192,14 +1214,20 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, for (mode = DC_PRED; mode <= TM_PRED; ++mode) { if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode]) + continue; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - xd->mi[0]->mbmi.uv_mode = mode; + xd->mi[0]->uv_mode = mode; if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd)) continue; this_rate = this_rate_tokenonly + - cpi->intra_uv_mode_cost[cpi->common.frame_type][mode]; + cpi->intra_uv_mode_cost[cpi->common.frame_type] + [xd->mi[0]->mode][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { @@ -1214,7 +1242,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - xd->mi[0]->mbmi.uv_mode = mode_selected; + xd->mi[0]->uv_mode = mode_selected; return best_rd; } @@ -1225,11 +1253,13 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; int64_t unused; - x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; + x->e_mbd.mi[0]->uv_mode = DC_PRED; memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); - *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; + *rate = *rate_tokenonly + + cpi->intra_uv_mode_cost[cm->frame_type] + [x->e_mbd.mi[0]->mode][DC_PRED]; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } @@ -1251,7 +1281,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } - *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; + *mode_uv = x->e_mbd.mi[0]->uv_mode; } static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, @@ -1267,31 +1297,30 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) { - MODE_INFO *const mic = xd->mi[0]; - const MB_MODE_INFO *const mbmi = &mic->mbmi; + MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; int thismvcost = 0; int idx, idy; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; - const int is_compound = has_second_ref(mbmi); + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type]; + const int is_compound = has_second_ref(mi); switch (mode) { case NEWMV: - this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; + this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int; thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); if (is_compound) { - this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; + this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int; thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); } break; case NEARMV: case NEARESTMV: - this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; + this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int; if (is_compound) - this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; + this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; break; case ZEROMV: this_mv[0].as_int = 0; @@ -1302,17 +1331,17 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, break; } - mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; + mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; if (is_compound) - mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; + mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int; - mic->bmi[i].as_mode = mode; + mi->bmi[i].as_mode = mode; for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) - memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); + memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i])); - return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) + + return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) + thismvcost; } @@ -1330,7 +1359,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; MODE_INFO *const mi = xd->mi[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); + const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; int idx, idy; @@ -1342,15 +1371,29 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, int64_t thisdistortion = 0, thissse = 0; int thisrate = 0, ref; const scan_order *so = &vp9_default_scan_orders[TX_4X4]; - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter]; + const int is_compound = has_second_ref(mi); + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; for (ref = 0; ref < 1 + is_compound; ++ref) { - const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i, - pd->pre[ref].stride)]; + const int bw = b_width_log2_lookup[BLOCK_8X8]; + const int h = 4 * (i >> bw); + const int w = 4 * (i & ((1 << bw) - 1)); + const struct scale_factors *sf = &xd->block_refs[ref]->sf; + int y_stride = pd->pre[ref].stride; + uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w); + + if (vp9_is_scaled(sf)) { + const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); + + y_stride = xd->block_refs[ref]->buf->y_stride; + pre = xd->block_refs[ref]->buf->y_buffer; + pre += scaled_buffer_offset(x_start + w, y_start + h, + y_stride, sf); + } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride, + vp9_highbd_build_inter_predictor(pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, @@ -1358,7 +1401,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2), xd->bd); } else { - vp9_build_inter_predictor(pre, pd->pre[ref].stride, + vp9_build_inter_predictor(pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, @@ -1367,7 +1410,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, mi_row * MI_SIZE + 4 * (i / 2)); } #else - vp9_build_inter_predictor(pre, pd->pre[ref].stride, + vp9_build_inter_predictor(pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, @@ -1467,7 +1510,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; + MODE_INFO *const mi = x->e_mbd.mi[0]; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; @@ -1476,17 +1519,17 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; - if (has_second_ref(mbmi)) + if (has_second_ref(mi)) pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)]; } static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; + MODE_INFO *mi = x->e_mbd.mi[0]; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; - if (has_second_ref(mbmi)) + if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1]; } @@ -1541,20 +1584,20 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - const int refs[2] = {mbmi->ref_frame[0], - mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]}; + MODE_INFO *mi = xd->mi[0]; + const int refs[2] = {mi->ref_frame[0], + mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]}; int_mv ref_mv[2]; int ite, ref; - const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter]; + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; struct scale_factors sf; // Do joint motion search in compound mode to get more accurate mv. struct buf_2d backup_yv12[2][MAX_MB_PLANE]; - int last_besterr[2] = {INT_MAX, INT_MAX}; + uint32_t last_besterr[2] = {UINT32_MAX, UINT32_MAX}; const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { - vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), - vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) + vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]), + vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1]) }; // Prediction buffer from second frame. @@ -1597,7 +1640,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // and break out of the search loop if it couldn't find a better mv. for (ite = 0; ite < 4; ite++) { struct buf_2d ref_yv12[2]; - int bestsme = INT_MAX; + uint32_t bestsme = UINT32_MAX; int sadpb = x->sadperbit16; MV tmp_mv; int search_range = 3; @@ -1662,7 +1705,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, search_range, &cpi->fn_ptr[bsize], &ref_mv[id].as_mv, second_pred); - if (bestsme < INT_MAX) + if (bestsme < UINT32_MAX) bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, second_pred, &cpi->fn_ptr[bsize], 1); @@ -1671,9 +1714,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; - if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; + if (bestsme < UINT32_MAX) { + uint32_t dis; /* TODO: use dis in distortion calculation later. */ + uint32_t sse; bestsme = cpi->find_fractional_mv_step( x, &tmp_mv, &ref_mv[id].as_mv, @@ -1730,7 +1773,6 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; - MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; @@ -1742,13 +1784,14 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t this_segment_rd = 0; int label_mv_thresh; int segmentyrate = 0; - const BLOCK_SIZE bsize = mbmi->sb_type; + const BLOCK_SIZE bsize = mi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; - const int has_second_rf = has_second_ref(mbmi); - const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; + SPEED_FEATURES *const sf = &cpi->sf; + const int has_second_rf = has_second_ref(mi); + const int inter_mode_mask = sf->inter_mode_mask[bsize]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; vp9_zero(*bsi); @@ -1784,7 +1827,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, int ref; for (ref = 0; ref < 1 + has_second_rf; ++ref) { - const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; frame_mv[ZEROMV][frame].as_int = 0; vp9_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame], @@ -1803,7 +1846,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, - this_mode, mbmi->ref_frame)) + this_mode, mi->ref_frame)) continue; memcpy(orig_pre, pd->pre, sizeof(orig_pre)); @@ -1814,10 +1857,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && - seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { + seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) { MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; - int thissme, bestsme = INT_MAX; + uint32_t bestsme = UINT32_MAX; int sadpb = x->sadperbit4; MV mvp_full; int max_mv; @@ -1837,12 +1880,12 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, } } if (i == 0) - max_mv = x->max_mv_context[mbmi->ref_frame[0]]; + max_mv = x->max_mv_context[mi->ref_frame[0]]; else max_mv = VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; - if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { + if (sf->mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. @@ -1855,9 +1898,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row = bsi->mvp.as_mv.row >> 3; mvp_full.col = bsi->mvp.as_mv.col >> 3; - if (cpi->sf.adaptive_motion_search) { - mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; - mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; + if (sf->adaptive_motion_search) { + mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; + mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; step_param = VPXMAX(step_param, 8); } @@ -1868,77 +1911,56 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, sadpb, - cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, + sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); - // Should we do a full search (best quality only) - if (cpi->oxcf.mode == BEST) { - int_mv *const best_mv = &mi->bmi[i].as_mv[0]; - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, &cpi->fn_ptr[bsize], - &bsi->ref_mv[0]->as_mv, - &best_mv->as_mv); - cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; - if (thissme < bestsme) { - bestsme = thissme; - *new_mv = best_mv->as_mv; - } else { - // The full search result is actually worse so re-instate the - // previous best vector - best_mv->as_mv = *new_mv; - } - } - - if (bestsme < INT_MAX) { - int distortion; + if (bestsme < UINT32_MAX) { + uint32_t distortion; cpi->find_fractional_mv_step( x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, + sf->mv.subpel_force_stop, + sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, - &x->pred_sse[mbmi->ref_frame[0]], + &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0); // save motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; + seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; } - if (cpi->sf.adaptive_motion_search) - x->pred_mv[mbmi->ref_frame[0]] = *new_mv; + if (sf->adaptive_motion_search) + x->pred_mv[mi->ref_frame[0]] = *new_mv; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); } if (has_second_rf) { - if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || - seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) + if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV || + seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) continue; } if (has_second_rf && this_mode == NEWMV && - mbmi->interp_filter == EIGHTTAP) { + mi->interp_filter == EIGHTTAP) { // adjust src pointers mi_buf_shift(x, i); - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (sf->comp_inter_joint_search_thresh <= bsize) { int rate_mv; joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, mi_col, seg_mvs[i], &rate_mv); - seg_mvs[i][mbmi->ref_frame[0]].as_int = - frame_mv[this_mode][mbmi->ref_frame[0]].as_int; - seg_mvs[i][mbmi->ref_frame[1]].as_int = - frame_mv[this_mode][mbmi->ref_frame[1]].as_int; + seg_mvs[i][mi->ref_frame[0]].as_int = + frame_mv[this_mode][mi->ref_frame[0]].as_int; + seg_mvs[i][mi->ref_frame[1]].as_int = + frame_mv[this_mode][mi->ref_frame[1]].as_int; } // restore src pointers mi_buf_restore(x, orig_src, orig_pre); @@ -2080,7 +2102,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < 4; i++) { mode_idx = INTER_OFFSET(bsi->modes[i]); mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; - if (has_second_ref(mbmi)) + if (has_second_ref(mi)) mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; mi->bmi[i].as_mode = bsi->modes[i]; @@ -2094,7 +2116,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnyrate = bsi->segment_yrate; *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); *psse = bsi->sse; - mbmi->mode = bsi->modes[3]; + mi->mode = bsi->modes[3]; return bsi->segment_rd; } @@ -2205,7 +2227,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // Gets an initial list of candidate vectors from neighbours and orders them vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, - NULL, NULL, mbmi_ext->mode_context); + mbmi_ext->mode_context); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, @@ -2226,13 +2248,13 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; const VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int bestsme = INT_MAX; int step_param; int sadpb = x->sadperbit16; MV mvp_full; - int ref = mbmi->ref_frame[0]; + int ref = mi->ref_frame[0]; MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; int tmp_col_min = x->mv_col_min; @@ -2324,7 +2346,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->mv_row_max = tmp_row_max; if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ + uint32_t dis; /* TODO: use dis in distortion calculation later. */ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, @@ -2398,14 +2420,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t filter_cache[]) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const int is_comp_pred = has_second_ref(mbmi); - const int this_mode = mbmi->mode; + const int is_comp_pred = has_second_ref(mi); + const int this_mode = mi->mode; int_mv *frame_mv = mode_mv[this_mode]; int i; - int refs[2] = { mbmi->ref_frame[0], - (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; + int refs[2] = { mi->ref_frame[0], + (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) }; int_mv cur_mv[2]; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); @@ -2443,10 +2465,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (pred_filter_search) { INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; - if (xd->up_available) - af = xd->mi[-xd->mi_stride]->mbmi.interp_filter; - if (xd->left_available) - lf = xd->mi[-1]->mbmi.interp_filter; + if (xd->above_mi) + af = xd->above_mi->interp_filter; + if (xd->left_mi) + lf = xd->left_mi->interp_filter; if ((this_mode != NEWMV) || (af == lf)) best_filter = af; @@ -2514,7 +2536,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (mv_check_bounds(x, &cur_mv[i].as_mv)) return INT64_MAX; - mbmi->mv[i].as_int = cur_mv[i].as_int; + mi->mv[i].as_int = cur_mv[i].as_int; } // do first prediction into the destination buffer. Do the next @@ -2544,14 +2566,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && - mbmi->mode != NEARESTMV) + mi->mode != NEARESTMV) return INT64_MAX; pred_exists = 0; // Are all MVs integer pel for Y and UV - intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); + intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv); if (is_comp_pred) - intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); + intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv); // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used @@ -2572,7 +2594,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_skip_sb = 0; int64_t tmp_skip_sse = INT64_MAX; - mbmi->interp_filter = i; + mi->interp_filter = i; rs = vp9_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); @@ -2597,7 +2619,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || (cm->interp_filter != SWITCHABLE && - (cm->interp_filter == mbmi->interp_filter || + (cm->interp_filter == mi->interp_filter || (i == 0 && intpel_mv)))) { restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { @@ -2634,14 +2656,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; - best_filter = mbmi->interp_filter; + best_filter = mi->interp_filter; if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; } if ((cm->interp_filter == SWITCHABLE && newbest) || (cm->interp_filter != SWITCHABLE && - cm->interp_filter == mbmi->interp_filter)) { + cm->interp_filter == mi->interp_filter)) { pred_exists = 1; tmp_rd = best_rd; @@ -2655,7 +2677,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } // Set the appropriate filter - mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? + mi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; @@ -2683,7 +2705,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (!is_comp_pred) - single_filter[this_mode][refs[0]] = mbmi->interp_filter; + single_filter[this_mode][refs[0]] = mi->interp_filter; if (cpi->sf.adaptive_mode_search) if (is_comp_pred) @@ -2770,8 +2792,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; - xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; - xd->mi[0]->mbmi.ref_frame[1] = NONE; + xd->mi[0]->ref_frame[0] = INTRA_FRAME; + xd->mi[0]->ref_frame[1] = NONE; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -2788,7 +2810,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, return; } } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize, + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->tx_size, bsize, pd[1].subsampling_x, pd[1].subsampling_y); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, @@ -2848,9 +2870,9 @@ static void rd_variance_adjustment(VP9_COMP *cpi, ? (source_variance - recon_variance) : (recon_variance - source_variance); - var_error = (200 * source_variance * recon_variance) / - ((source_variance * source_variance) + - (recon_variance * recon_variance)); + var_error = ((int64_t)200 * source_variance * recon_variance) / + (((int64_t)source_variance * source_variance) + + ((int64_t)recon_variance * recon_variance)); var_error = 100 - var_error; } @@ -2952,12 +2974,12 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const struct segmentation *const seg = &cm->seg; PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; - unsigned char segment_id = mbmi->segment_id; + unsigned char segment_id = mi->segment_id; int comp_pred, i, k; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; @@ -2971,7 +2993,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode; + MODE_INFO best_mbmode; int best_mode_skippable = 0; int midx, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; @@ -3038,7 +3060,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { // Skip checking missing references in both single and compound reference - // modes. Note that a mode will be skipped iff both reference frames + // modes. Note that a mode will be skipped if both reference frames // are masked out. ref_frame_skip_mask[0] |= (1 << ref_frame); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; @@ -3189,7 +3211,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, const int bsl = mi_width_log2_lookup[bsize]; int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1; - MB_MODE_INFO *ref_mbmi; + MODE_INFO *ref_mi; int const_motion = 1; int skip_ref_frame = !cb_partition_search_ctrl; MV_REFERENCE_FRAME rf = NONE; @@ -3197,26 +3219,26 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, ref_mv.as_int = INVALID_MV; if ((mi_row - 1) >= tile_info->mi_row_start) { - ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0]; - rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0]; + ref_mv = xd->mi[-xd->mi_stride]->mv[0]; + rf = xd->mi[-xd->mi_stride]->ref_frame[0]; for (i = 0; i < mi_width; ++i) { - ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi; - const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && - (ref_frame == ref_mbmi->ref_frame[0]); - skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); + ref_mi = xd->mi[-xd->mi_stride + i]; + const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && + (ref_frame == ref_mi->ref_frame[0]); + skip_ref_frame &= (rf == ref_mi->ref_frame[0]); } } if ((mi_col - 1) >= tile_info->mi_col_start) { if (ref_mv.as_int == INVALID_MV) - ref_mv = xd->mi[-1]->mbmi.mv[0]; + ref_mv = xd->mi[-1]->mv[0]; if (rf == NONE) - rf = xd->mi[-1]->mbmi.ref_frame[0]; + rf = xd->mi[-1]->ref_frame[0]; for (i = 0; i < mi_height; ++i) { - ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi; - const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && - (ref_frame == ref_mbmi->ref_frame[0]); - skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); + ref_mi = xd->mi[i * xd->mi_stride - 1]; + const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && + (ref_frame == ref_mi->ref_frame[0]); + skip_ref_frame &= (rf == ref_mi->ref_frame[0]); } } @@ -3287,15 +3309,15 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, continue; } - mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; + mi->mode = this_mode; + mi->uv_mode = DC_PRED; + mi->ref_frame[0] = ref_frame; + mi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; - mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; + mi->mv[0].as_int = mi->mv[1].as_int = 0; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); @@ -3316,7 +3338,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (rate_y == INT_MAX) continue; - uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x, + uv_tx = get_uv_tx_size_impl(mi->tx_size, bsize, pd->subsampling_x, pd->subsampling_y); if (rate_uv_intra[uv_tx] == INT_MAX) { choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, @@ -3327,9 +3349,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, rate_uv = rate_uv_tokenonly[uv_tx]; distortion_uv = dist_uv[uv_tx]; skippable = skippable && skip_uv[uv_tx]; - mbmi->uv_mode = mode_uv[uv_tx]; + mi->uv_mode = mode_uv[uv_tx]; - rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; + rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx]; if (this_mode != DC_PRED && this_mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; @@ -3360,28 +3382,34 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, } if (!disable_skip) { + const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); + const int skip_cost0 = vp9_cost_bit(skip_prob, 0); + const int skip_cost1 = vp9_cost_bit(skip_prob, 1); + if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // Cost the skip mb case - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + rate2 += skip_cost1; } else if (ref_frame != INTRA_FRAME && !xd->lossless) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { + if (RDCOST(x->rdmult, x->rddiv, + rate_y + rate_uv + skip_cost0, distortion2) < + RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); - distortion2 = total_sse; assert(total_sse >= 0); + + rate2 += skip_cost1; + distortion2 = total_sse; rate2 -= (rate_y + rate_uv); this_skip2 = 1; } } else { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. @@ -3397,7 +3425,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, // Keep record of best intra rd if (this_rd < best_intra_rd) { best_intra_rd = this_rd; - best_intra_mode = mbmi->mode; + best_intra_mode = mi->mode; } } @@ -3417,7 +3445,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; + mi->mv[0].as_int = 0; max_plane = 1; } else { best_pred_sse = x->pred_sse[ref_frame]; @@ -3427,13 +3455,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; - best_mbmode = *mbmi; + best_mbmode = *mi; best_skip2 = this_skip2; best_mode_skippable = skippable; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); - memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], + memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); // TODO(debargha): enhance this test with a better distortion prediction @@ -3549,8 +3577,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, // Do Intra UV best rd mode selection if best mode choice above was intra. if (best_mbmode.ref_frame[0] == INTRA_FRAME) { TX_SIZE uv_tx_size; - *mbmi = best_mbmode; - uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); + *mi = best_mbmode; + uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], @@ -3569,7 +3597,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, sf->adaptive_rd_thresh, bsize, best_mode_index); // macroblock modes - *mbmi = best_mbmode; + *mi = best_mbmode; x->skip |= best_skip2; for (i = 0; i < REFERENCE_MODES; ++i) { @@ -3599,7 +3627,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (!x->skip && !x->select_tx_size) { int has_high_freq_coeff = 0; int plane; - int max_plane = is_inter_block(&xd->mi[0]->mbmi) + int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1; for (plane = 0; plane < max_plane; ++plane) { x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; @@ -3629,8 +3657,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - unsigned char segment_id = mbmi->segment_id; + MODE_INFO *const mi = xd->mi[0]; + unsigned char segment_id = mi->segment_id; const int comp_pred = 0; int i; int64_t best_pred_diff[REFERENCE_MODES]; @@ -3656,11 +3684,11 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); - mbmi->mode = ZEROMV; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE; - mbmi->mv[0].as_int = 0; + mi->mode = ZEROMV; + mi->uv_mode = DC_PRED; + mi->ref_frame[0] = LAST_FRAME; + mi->ref_frame[1] = NONE; + mi->mv[0].as_int = 0; x->skip = 1; if (cm->interp_filter != BILINEAR) { @@ -3670,21 +3698,21 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, int rs; int best_rs = INT_MAX; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { - mbmi->interp_filter = i; + mi->interp_filter = i; rs = vp9_get_switchable_rate(cpi, xd); if (rs < best_rs) { best_rs = rs; - best_filter = mbmi->interp_filter; + best_filter = mi->interp_filter; } } } } // Set the appropriate filter if (cm->interp_filter == SWITCHABLE) { - mbmi->interp_filter = best_filter; + mi->interp_filter = best_filter; rate2 += vp9_get_switchable_rate(cpi, xd); } else { - mbmi->interp_filter = cm->interp_filter; + mi->interp_filter = cm->interp_filter; } if (cm->reference_mode == REFERENCE_MODE_SELECT) @@ -3706,7 +3734,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, } assert((cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == mbmi->interp_filter)); + (cm->interp_filter == mi->interp_filter)); vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); @@ -3732,10 +3760,10 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame; - unsigned char segment_id = mbmi->segment_id; + unsigned char segment_id = mi->segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; @@ -3747,7 +3775,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode; + MODE_INFO best_mbmode; int ref_index, best_ref_index = 0; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vpx_prob comp_mode_p; @@ -3821,6 +3849,16 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, ref_frame = vp9_ref_order[ref_index].ref_frame[0]; second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; +#if CONFIG_BETTER_HW_COMPATIBILITY + // forbid 8X4 and 4X8 partitions if any reference frame is scaled. + if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { + int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); + if (second_ref_frame > INTRA_FRAME) + ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf); + if (ref_scaled) + continue; + } +#endif // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { @@ -3896,14 +3934,14 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, continue; } - mbmi->tx_size = TX_4X4; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; + mi->tx_size = TX_4X4; + mi->uv_mode = DC_PRED; + mi->ref_frame[0] = ref_frame; + mi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP - : cm->interp_filter; + mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); @@ -3934,7 +3972,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, rate_uv = rate_uv_tokenonly; distortion2 += dist_uv; distortion_uv = dist_uv; - mbmi->uv_mode = mode_uv; + mi->uv_mode = mode_uv; } else { int rate; int64_t distortion; @@ -3947,7 +3985,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int_mv *second_ref = comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; b_mode_info tmp_best_bmodes[16]; - MB_MODE_INFO tmp_best_mbmode; + MODE_INFO tmp_best_mbmode; BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; int pred_exists = 0; int uv_skippable; @@ -3956,8 +3994,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int ref; for (ref = 0; ref < 2; ++ref) { - scaled_ref_frame[ref] = mbmi->ref_frame[ref] > INTRA_FRAME ? - vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[ref]) : NULL; + scaled_ref_frame[ref] = mi->ref_frame[ref] > INTRA_FRAME ? + vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref]) : NULL; if (scaled_ref_frame[ref]) { int i; @@ -3996,7 +4034,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int newbest, rs; int64_t rs_rd; MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; - mbmi->interp_filter = switchable_filter_index; + mi->interp_filter = switchable_filter_index; tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, @@ -4020,11 +4058,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, newbest = (tmp_rd < tmp_best_rd); if (newbest) { - tmp_best_filter = mbmi->interp_filter; + tmp_best_filter = mi->interp_filter; tmp_best_rd = tmp_rd; } if ((newbest && cm->interp_filter == SWITCHABLE) || - (mbmi->interp_filter == cm->interp_filter && + (mi->interp_filter == cm->interp_filter && cm->interp_filter != SWITCHABLE)) { tmp_best_rdu = tmp_rd; tmp_best_rate = rate; @@ -4032,7 +4070,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, tmp_best_distortion = distortion; tmp_best_sse = total_sse; tmp_best_skippable = skippable; - tmp_best_mbmode = *mbmi; + tmp_best_mbmode = *mi; for (i = 0; i < 4; i++) { tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; @@ -4044,7 +4082,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (tmp_best_rdu / 2 > best_rd) { // skip searching the other filters if the first is // already substantially larger than the best so far - tmp_best_filter = mbmi->interp_filter; + tmp_best_filter = mi->interp_filter; tmp_best_rdu = INT64_MAX; break; } @@ -4057,8 +4095,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (tmp_best_rdu == INT64_MAX && pred_exists) continue; - mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? - tmp_best_filter : cm->interp_filter); + mi->interp_filter = (cm->interp_filter == SWITCHABLE ? + tmp_best_filter : cm->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4076,7 +4114,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, rate_y = tmp_best_ratey; distortion = tmp_best_distortion; skippable = tmp_best_skippable; - *mbmi = tmp_best_mbmode; + *mi = tmp_best_mbmode; for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } @@ -4143,17 +4181,21 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, } if (!disable_skip) { + const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); + const int skip_cost0 = vp9_cost_bit(skip_prob, 0); + const int skip_cost1 = vp9_cost_bit(skip_prob, 1); + // Skip is never coded at the segment level for sub8x8 blocks and instead // always coded in the bitstream at the mode info level. - if (ref_frame != INTRA_FRAME && !xd->lossless) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { + if (RDCOST(x->rdmult, x->rddiv, + rate_y + rate_uv + skip_cost0, distortion2) < + RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + rate2 += skip_cost1; distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); @@ -4163,7 +4205,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, } } else { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. @@ -4186,7 +4228,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; + mi->mv[0].as_int = 0; max_plane = 1; } @@ -4196,7 +4238,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, best_rd = this_rd; best_yrd = best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); - best_mbmode = *mbmi; + best_mbmode = *mi; best_skip2 = this_skip2; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); @@ -4294,7 +4336,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (sf->use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (best_mbmode.ref_frame[0] == INTRA_FRAME) { - *mbmi = best_mbmode; + *mi = best_mbmode; rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly, &dist_uv, @@ -4318,7 +4360,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, sf->adaptive_rd_thresh, bsize, best_ref_index); // macroblock modes - *mbmi = best_mbmode; + *mi = best_mbmode; x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) @@ -4327,8 +4369,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, for (i = 0; i < 4; ++i) memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; + mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; + mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { diff --git a/libvpx/vp9/encoder/vp9_rdopt.h b/libvpx/vp9/encoder/vp9_rdopt.h index 00ee55c67..253e4a02d 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.h +++ b/libvpx/vp9/encoder/vp9_rdopt.h @@ -29,15 +29,6 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); -unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs); -#if CONFIG_VP9_HIGHBITDEPTH -unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs, int bd); -#endif - void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, diff --git a/libvpx/vp9/encoder/vp9_resize.c b/libvpx/vp9/encoder/vp9_resize.c index 59c747852..307a1123a 100644 --- a/libvpx/vp9/encoder/vp9_resize.c +++ b/libvpx/vp9/encoder/vp9_resize.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include <string.h> +#include "./vpx_config.h" #if CONFIG_VP9_HIGHBITDEPTH #include "vpx_dsp/vpx_dsp_common.h" #endif // CONFIG_VP9_HIGHBITDEPTH @@ -445,7 +446,7 @@ static void resize_multistep(const uint8_t *const input, int length, uint8_t *output, int olength, - uint8_t *buf) { + uint8_t *otmp) { int steps; if (length == olength) { memcpy(output, input, sizeof(output[0]) * length); @@ -456,15 +457,10 @@ static void resize_multistep(const uint8_t *const input, if (steps > 0) { int s; uint8_t *out = NULL; - uint8_t *tmpbuf = NULL; - uint8_t *otmp, *otmp2; + uint8_t *otmp2; int filteredlength = length; - if (!tmpbuf) { - tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length); - otmp = tmpbuf; - } else { - otmp = buf; - } + + assert(otmp != NULL); otmp2 = otmp + get_down2_length(length, 1); for (s = 0; s < steps; ++s) { const int proj_filteredlength = get_down2_length(filteredlength, 1); @@ -482,8 +478,6 @@ static void resize_multistep(const uint8_t *const input, if (filteredlength != olength) { interpolate(out, filteredlength, output, olength); } - if (tmpbuf) - free(tmpbuf); } else { interpolate(input, length, output, olength); } @@ -519,22 +513,29 @@ void vp9_resize_plane(const uint8_t *const input, uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height); uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width)); - uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2)); + uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height); + uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2); + if (intbuf == NULL || tmpbuf == NULL || + arrbuf == NULL || arrbuf2 == NULL) + goto Error; assert(width > 0); assert(height > 0); assert(width2 > 0); assert(height2 > 0); for (i = 0; i < height; ++i) resize_multistep(input + in_stride * i, width, - intbuf + width2 * i, width2, tmpbuf); + intbuf + width2 * i, width2, tmpbuf); for (i = 0; i < width2; ++i) { fill_col_to_arr(intbuf + i, width2, height, arrbuf); - resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf); - fill_arr_to_col(output + i, out_stride, height2, arrbuf + height); + resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf); + fill_arr_to_col(output + i, out_stride, height2, arrbuf2); } + + Error: free(intbuf); free(tmpbuf); free(arrbuf); + free(arrbuf2); } #if CONFIG_VP9_HIGHBITDEPTH @@ -737,7 +738,7 @@ static void highbd_resize_multistep(const uint16_t *const input, int length, uint16_t *output, int olength, - uint16_t *buf, + uint16_t *otmp, int bd) { int steps; if (length == olength) { @@ -749,15 +750,10 @@ static void highbd_resize_multistep(const uint16_t *const input, if (steps > 0) { int s; uint16_t *out = NULL; - uint16_t *tmpbuf = NULL; - uint16_t *otmp, *otmp2; + uint16_t *otmp2; int filteredlength = length; - if (!tmpbuf) { - tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length); - otmp = tmpbuf; - } else { - otmp = buf; - } + + assert(otmp != NULL); otmp2 = otmp + get_down2_length(length, 1); for (s = 0; s < steps; ++s) { const int proj_filteredlength = get_down2_length(filteredlength, 1); @@ -775,8 +771,6 @@ static void highbd_resize_multistep(const uint16_t *const input, if (filteredlength != olength) { highbd_interpolate(out, filteredlength, output, olength, bd); } - if (tmpbuf) - free(tmpbuf); } else { highbd_interpolate(input, length, output, olength, bd); } @@ -815,21 +809,28 @@ void vp9_highbd_resize_plane(const uint8_t *const input, uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height); uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width)); - uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2)); + uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height); + uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2); + if (intbuf == NULL || tmpbuf == NULL || + arrbuf == NULL || arrbuf2 == NULL) + goto Error; for (i = 0; i < height; ++i) { highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, intbuf + width2 * i, width2, tmpbuf, bd); } for (i = 0; i < width2; ++i) { highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); - highbd_resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf, + highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd); highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, - arrbuf + height); + arrbuf2); } + + Error: free(intbuf); free(tmpbuf); free(arrbuf); + free(arrbuf2); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c index c5c50a244..5a0a23d48 100644 --- a/libvpx/vp9/encoder/vp9_segmentation.c +++ b/libvpx/vp9/encoder/vp9_segmentation.c @@ -118,7 +118,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, return; xd->mi = mi; - segment_id = xd->mi[0]->mbmi.segment_id; + segment_id = xd->mi[0]->segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -127,7 +127,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, // Temporal prediction not allowed on key frames if (cm->frame_type != KEY_FRAME) { - const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->sb_type; // Test to see if the segment id matches the predicted value. const int pred_segment_id = get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col); @@ -136,7 +136,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, // Store the prediction status for this mb and update counts // as appropriate - xd->mi[0]->mbmi.seg_id_predicted = pred_flag; + xd->mi[0]->seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; // Update the "unpredicted" segment count @@ -159,8 +159,8 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type]; + bw = num_8x8_blocks_wide_lookup[mi[0]->sb_type]; + bh = num_8x8_blocks_high_lookup[mi[0]->sb_type]; if (bw == bs && bh == bs) { count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, diff --git a/libvpx/vp9/encoder/vp9_skin_detection.c b/libvpx/vp9/encoder/vp9_skin_detection.c index c2763b7da..23a5fc775 100644 --- a/libvpx/vp9/encoder/vp9_skin_detection.c +++ b/libvpx/vp9/encoder/vp9_skin_detection.c @@ -15,22 +15,29 @@ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_skin_detection.h" +#define MODEL_MODE 1 + // Fixed-point skin color model parameters. -static const int skin_mean[2] = {7463, 9614}; // q6 +static const int skin_mean[5][2] = { + {7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}}; static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 -static const int skin_threshold = 1570636; // q18 +static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000, + 800000}; // q18 // Thresholds on luminance. -static const int y_low = 20; +static const int y_low = 40; static const int y_high = 220; // Evaluates the Mahalanobis distance measure for the input CbCr values. -static int evaluate_skin_color_difference(int cb, int cr) { +static int evaluate_skin_color_difference(int cb, int cr, int idx) { const int cb_q6 = cb << 6; const int cr_q6 = cr << 6; - const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]); - const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]); - const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]); + const int cb_diff_q12 = + (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]); + const int cbcr_diff_q12 = + (cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]); + const int cr_diff_q12 = + (cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]); const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; @@ -41,13 +48,65 @@ static int evaluate_skin_color_difference(int cb, int cr) { return skin_diff; } -int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) { - if (y < y_low || y > y_high) +int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr, + int motion) { + if (y < y_low || y > y_high) { return 0; - else - return (evaluate_skin_color_difference(cb, cr) < skin_threshold); + } else { + if (MODEL_MODE == 0) { + return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]); + } else { + int i = 0; + // Exit on grey. + if (cb == 128 && cr == 128) + return 0; + // Exit on very strong cb. + if (cb > 150 && cr < 110) + return 0; + for (; i < 5; i++) { + int skin_color_diff = evaluate_skin_color_difference(cb, cr, i); + if (skin_color_diff < skin_threshold[i + 1]) { + if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2)) + return 0; + else if (motion == 0 && + skin_color_diff > (skin_threshold[i + 1] >> 1)) + return 0; + else + return 1; + } + // Exit if difference is much large than the threshold. + if (skin_color_diff > (skin_threshold[i + 1] << 3)) { + return 0; + } + } + return 0; + } + } } +int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, + int stride, int strideuv, int bsize, + int consec_zeromv, int curr_motion_magn) { + // No skin if block has been zero/small motion for long consecutive time. + if (consec_zeromv > 60 && curr_motion_magn == 0) { + return 0; + } else { + int motion = 1; + // Take center pixel in block to determine is_skin. + const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1; + const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1; + const int uv_width_shift = y_width_shift >> 1; + const int uv_height_shift = y_height_shift >> 1; + const uint8_t ysource = y[y_height_shift * stride + y_width_shift]; + const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift]; + const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift]; + if (consec_zeromv > 25 && curr_motion_magn == 0) + motion = 0; + return vp9_skin_pixel(ysource, usource, vsource, motion); + } +} + + #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { @@ -67,7 +126,7 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { int shuv = shy - 1; int fac = y_bsize / 8; // Use center pixel or average of center 2x2 pixels. - int mode_filter = 1; + int mode_filter = 0; YV12_BUFFER_CONFIG skinmap; memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height, @@ -84,27 +143,46 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) { num_bl = 0; for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) { - // Select pixel for each block for skin detection. - // Use center pixel, or 2x2 average at center. - uint8_t ysource = src_y[ypos * src_ystride + ypos]; - uint8_t usource = src_u[uvpos * src_uvstride + uvpos]; - uint8_t vsource = src_v[uvpos * src_uvstride + uvpos]; - uint8_t ysource2 = src_y[(ypos + 1) * src_ystride + ypos]; - uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos]; - uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos]; - uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)]; - uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)]; - uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)]; - uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)]; - uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)]; - uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)]; int is_skin = 0; if (mode_filter == 1) { + // Use 2x2 average at center. + uint8_t ysource = src_y[ypos * src_ystride + ypos]; + uint8_t usource = src_u[uvpos * src_uvstride + uvpos]; + uint8_t vsource = src_v[uvpos * src_uvstride + uvpos]; + uint8_t ysource2 = src_y[(ypos + 1) * src_ystride + ypos]; + uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos]; + uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos]; + uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)]; + uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)]; + uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)]; + uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)]; + uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)]; + uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)]; ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2; usource = (usource + usource2 + usource3 + usource4) >> 2; vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2; + is_skin = vp9_skin_pixel(ysource, usource, vsource, 1); + } else { + int block_size = BLOCK_8X8; + int consec_zeromv = 0; + int bl_index = mi_row * cm->mi_cols + mi_col; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + if (y_bsize == 8) + consec_zeromv = cpi->consec_zero_mv[bl_index]; + else + consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], + VPXMIN(cpi->consec_zero_mv[bl_index1], + VPXMIN(cpi->consec_zero_mv[bl_index2], + cpi->consec_zero_mv[bl_index3]))); + if (y_bsize == 16) + block_size = BLOCK_16X16; + is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, + src_uvstride, block_size, + consec_zeromv, + 0); } - is_skin = vp9_skin_pixel(ysource, usource, vsource); for (i = 0; i < y_bsize; i++) { for (j = 0; j < y_bsize; j++) { if (is_skin) diff --git a/libvpx/vp9/encoder/vp9_skin_detection.h b/libvpx/vp9/encoder/vp9_skin_detection.h index 0a87ef9f4..c77382dbd 100644 --- a/libvpx/vp9/encoder/vp9_skin_detection.h +++ b/libvpx/vp9/encoder/vp9_skin_detection.h @@ -21,7 +21,12 @@ struct VP9_COMP; // #define OUTPUT_YUV_SKINMAP -int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr); +int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr, + int motion); + +int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, + int stride, int strideuv, int bsize, + int consec_zeromv, int curr_motion_magn); #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c index a53962984..e7f04a244 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.c +++ b/libvpx/vp9/encoder/vp9_speed_features.c @@ -15,6 +15,22 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vpx_dsp/vpx_dsp_common.h" +// Mesh search patters for various speed settings +static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = + {{64, 4}, {28, 2}, {15, 1}, {7, 1}}; + +#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method +static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1] + [MAX_MESH_STEP] = + {{{64, 8}, {28, 4}, {15, 1}, {7, 1}}, + {{64, 8}, {28, 4}, {15, 1}, {7, 1}}, + {{64, 8}, {14, 2}, {7, 1}, {7, 1}}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}}, + }; +static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = + {50, 25, 15, 5, 1, 1}; // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality @@ -259,6 +275,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; sf->use_fast_coef_costing = 1; + sf->allow_exhaustive_searches = 0; + sf->exhaustive_searches_thresh = INT_MAX; if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); @@ -285,12 +303,26 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_LOWVAR; sf->adaptive_pred_interp_filter = 2; - // Disable reference masking if using spatial scaling since - // pred_mv_sad will not be set (since vp9_mv_pred will not - // be called). - // TODO(marpan/agrange): Fix this condition. - sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC && - cpi->svc.number_spatial_layers == 1) ? 1 : 0; + // Reference masking only enabled for 1 spatial layer, and if none of the + // references have been scaled. The latter condition needs to be checked + // for external or internal dynamic resize. + sf->reference_masking = (cpi->svc.number_spatial_layers == 1); + if (sf->reference_masking == 1 && + (cpi->external_resize == 1 || + cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) { + MV_REFERENCE_FRAME ref_frame; + static const int flag_list[4] = + {0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG}; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { + const struct scale_factors *const scale_fac = + &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) + sf->reference_masking = 0; + } + } + } sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -368,6 +400,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->simple_model_rd_from_var = 1; + if (cpi->oxcf.rc_mode == VPX_VBR) + sf->mv.search_method = NSTEP; if (!is_keyframe) { int i; @@ -376,13 +410,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V; } else { for (i = 0; i < BLOCK_SIZES; ++i) - if (i >= BLOCK_16X16) + if (i > BLOCK_16X16) sf->intra_y_mode_bsize_mask[i] = INTRA_DC; else // Use H and V intra mode for block sizes <= 16X16. sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; } } + if (content == VP9E_CONTENT_SCREEN) { + sf->short_circuit_flat_blocks = 1; + } } if (speed >= 6) { @@ -392,6 +429,11 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; + if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && + content != VP9E_CONTENT_SCREEN) { + // Enable short circuit for low temporal variance. + sf->short_circuit_low_temp_var = 1; + } } if (speed >= 7) { @@ -406,8 +448,19 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 8) { sf->adaptive_rd_thresh = 4; - sf->mv.subpel_force_stop = 2; + sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2; sf->lpf_pick = LPF_PICK_MINIMAL_LPF; + // Only keep INTRA_DC mode for speed 8. + if (!is_keyframe) { + int i = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + } + if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && + content != VP9E_CONTENT_SCREEN) { + // More aggressive short circuit for speed 8. + sf->short_circuit_low_temp_var = 2; + } } } @@ -460,7 +513,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->mv.auto_mv_step_size = 0; sf->mv.fullpel_search_step_param = 6; sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; @@ -516,10 +568,15 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // Recode loop tolerance %. sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; - sf->tx_size_search_breakout = 0; - sf->partition_search_breakout_dist_thr = 0; - sf->partition_search_breakout_rate_thr = 0; sf->simple_model_rd_from_var = 0; + sf->short_circuit_flat_blocks = 0; + sf->short_circuit_low_temp_var = 0; + + // Some speed-up features even for best quality as minimal impact on quality. + sf->adaptive_rd_thresh = 1; + sf->tx_size_search_breakout = 1; + sf->partition_search_breakout_dist_thr = (1 << 19); + sf->partition_search_breakout_rate_thr = 80; if (oxcf->mode == REALTIME) set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); @@ -527,8 +584,36 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { set_good_speed_feature(cpi, cm, sf, oxcf->speed); cpi->full_search_sad = vp9_full_search_sad; - cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search - : vp9_diamond_search_sad; + cpi->diamond_search_sad = vp9_diamond_search_sad; + + sf->allow_exhaustive_searches = 1; + if (oxcf->mode == BEST) { + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) + sf->exhaustive_searches_thresh = (1 << 20); + else + sf->exhaustive_searches_thresh = (1 << 21); + sf->max_exaustive_pct = 100; + for (i = 0; i < MAX_MESH_STEP; ++i) { + sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range; + sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval; + } + } else { + int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed; + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) + sf->exhaustive_searches_thresh = (1 << 22); + else + sf->exhaustive_searches_thresh = (1 << 23); + sf->max_exaustive_pct = good_quality_max_mesh_pct[speed]; + if (speed > 0) + sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1; + + for (i = 0; i < MAX_MESH_STEP; ++i) { + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[speed][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[speed][i].interval; + } + } // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. @@ -541,7 +626,10 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->optimize_coefficients = 0; } - if (sf->mv.subpel_search_method == SUBPEL_TREE) { + if (sf->mv.subpel_force_stop == 3) { + // Whole pel only + cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree; + } else if (sf->mv.subpel_search_method == SUBPEL_TREE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h index 575e98cf5..e88a7dfff 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.h +++ b/libvpx/vp9/encoder/vp9_speed_features.h @@ -188,13 +188,24 @@ typedef struct MV_SPEED_FEATURES { // Maximum number of steps in logarithmic subpel search before giving up. int subpel_iters_per_step; - // Control when to stop subpel search + // Control when to stop subpel search: + // 0: Full subpel search. + // 1: Stop at quarter pixel. + // 2: Stop at half pixel. + // 3: Stop at full pixel. int subpel_force_stop; // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; } MV_SPEED_FEATURES; +#define MAX_MESH_STEP 4 + +typedef struct MESH_PATTERN { + int range; + int interval; +} MESH_PATTERN; + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -299,6 +310,18 @@ typedef struct SPEED_FEATURES { // point for this motion search and limits the search range around it. int adaptive_motion_search; + // Flag for allowing some use of exhaustive searches; + int allow_exhaustive_searches; + + // Threshold for allowing exhaistive motion search. + int exhaustive_searches_thresh; + + // Maximum number of exhaustive searches for a frame. + int max_exaustive_pct; + + // Pattern to be used for any exhaustive mesh searches. + MESH_PATTERN mesh_patterns[MAX_MESH_STEP]; + int schedule_mode_search; // Allows sub 8x8 modes to use the prediction filter that was determined @@ -419,6 +442,18 @@ typedef struct SPEED_FEATURES { // Fast approximation of vp9_model_rd_from_var_lapndz int simple_model_rd_from_var; + + // Skip a number of expensive mode evaluations for blocks with zero source + // variance. + int short_circuit_flat_blocks; + + // Skip a number of expensive mode evaluations for blocks with very low + // temporal variance. + // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32. + // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL + // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and + // 32x16. + int short_circuit_low_temp_var; } SPEED_FEATURES; struct VP9_COMP; diff --git a/libvpx/vp9/encoder/vp9_subexp.c b/libvpx/vp9/encoder/vp9_subexp.c index 799f179d9..29db01542 100644 --- a/libvpx/vp9/encoder/vp9_subexp.c +++ b/libvpx/vp9/encoder/vp9_subexp.c @@ -14,9 +14,7 @@ #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_subexp.h" -#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd))) - -static const int update_bits[255] = { +static const uint8_t update_bits[255] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -34,6 +32,7 @@ static const int update_bits[255] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, }; +#define MIN_DELP_BITS 5 static int recenter_nonneg(int v, int m) { if (v > (m << 1)) @@ -46,7 +45,7 @@ static int recenter_nonneg(int v, int m) { static int remap_prob(int v, int m) { int i; - static const int map_table[MAX_PROB - 1] = { + static const uint8_t map_table[MAX_PROB - 1] = { // generated by: // map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM); 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, @@ -80,7 +79,7 @@ static int remap_prob(int v, int m) { static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) { int delp = remap_prob(newp, oldp); - return update_bits[delp] * 256; + return update_bits[delp] << VP9_PROB_COST_SHIFT; } static void encode_uniform(vpx_writer *w, int v) { @@ -123,14 +122,17 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct, int bestsavings = 0; vpx_prob newp, bestnewp = oldp; const int step = *bestp > oldp ? -1 : 1; + const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd); - for (newp = *bestp; newp != oldp; newp += step) { - const int new_b = cost_branch256(ct, newp); - const int update_b = prob_diff_update_cost(newp, oldp) + vp9_cost_upd256; - const int savings = old_b - new_b - update_b; - if (savings > bestsavings) { - bestsavings = savings; - bestnewp = newp; + if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) { + for (newp = *bestp; newp != oldp; newp += step) { + const int new_b = cost_branch256(ct, newp); + const int update_b = prob_diff_update_cost(newp, oldp) + upd_cost; + const int savings = old_b - new_b - update_b; + if (savings > bestsavings) { + bestsavings = savings; + bestnewp = newp; + } } } *bestp = bestnewp; @@ -138,52 +140,35 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct, } int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, - const vpx_prob *oldp, + const vpx_prob oldp, vpx_prob *bestp, vpx_prob upd, int stepsize) { - int i, old_b, new_b, update_b, savings, bestsavings, step; + int i, old_b, new_b, update_b, savings, bestsavings; int newp; - vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; - vp9_model_to_full_probs(oldp, oldplist); - memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES); - for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i) - old_b += cost_branch256(ct + 2 * i, oldplist[i]); - old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]); + const int step_sign = *bestp > oldp ? -1 : 1; + const int step = stepsize * step_sign; + const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd); + const vpx_prob *newplist, *oldplist; + vpx_prob bestnewp; + oldplist = vp9_pareto8_full[oldp - 1]; + old_b = cost_branch256(ct + 2 * PIVOT_NODE, oldp); + for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) + old_b += cost_branch256(ct + 2 * i, oldplist[i - UNCONSTRAINED_NODES]); bestsavings = 0; - bestnewp = oldp[PIVOT_NODE]; - - if (*bestp > oldp[PIVOT_NODE]) { - step = -stepsize; - for (newp = *bestp; newp > oldp[PIVOT_NODE]; newp += step) { - if (newp < 1 || newp > 255) - continue; - newplist[PIVOT_NODE] = newp; - vp9_model_to_full_probs(newplist, newplist); - for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i) - new_b += cost_branch256(ct + 2 * i, newplist[i]); - new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]); - update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + - vp9_cost_upd256; - savings = old_b - new_b - update_b; - if (savings > bestsavings) { - bestsavings = savings; - bestnewp = newp; - } - } - } else { - step = stepsize; - for (newp = *bestp; newp < oldp[PIVOT_NODE]; newp += step) { - if (newp < 1 || newp > 255) - continue; - newplist[PIVOT_NODE] = newp; - vp9_model_to_full_probs(newplist, newplist); - for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i) - new_b += cost_branch256(ct + 2 * i, newplist[i]); - new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]); - update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + - vp9_cost_upd256; + bestnewp = oldp; + + assert(stepsize > 0); + + if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) { + for (newp = *bestp; (newp - oldp) * step_sign < 0; newp += step) { + if (newp < 1 || newp > 255) continue; + newplist = vp9_pareto8_full[newp - 1]; + new_b = cost_branch256(ct + 2 * PIVOT_NODE, newp); + for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) + new_b += cost_branch256(ct + 2 * i, newplist[i - UNCONSTRAINED_NODES]); + update_b = prob_diff_update_cost(newp, oldp) + upd_cost; savings = old_b - new_b - update_b; if (savings > bestsavings) { bestsavings = savings; diff --git a/libvpx/vp9/encoder/vp9_subexp.h b/libvpx/vp9/encoder/vp9_subexp.h index b96823232..efe62c0e7 100644 --- a/libvpx/vp9/encoder/vp9_subexp.h +++ b/libvpx/vp9/encoder/vp9_subexp.h @@ -32,7 +32,7 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct, int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, - const vpx_prob *oldp, + const vpx_prob oldp, vpx_prob *bestp, vpx_prob upd, int stepsize); diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c index 8a6818c86..1814a32c9 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -16,7 +16,6 @@ #include "vp9/encoder/vp9_extend.h" #include "vpx_dsp/vpx_dsp_common.h" -#define SMALL_FRAME_FB_IDX 7 #define SMALL_FRAME_WIDTH 32 #define SMALL_FRAME_HEIGHT 16 @@ -25,12 +24,44 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; int mi_rows = cpi->common.mi_rows; int mi_cols = cpi->common.mi_cols; - int sl, tl; + int sl, tl, i; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; svc->first_spatial_layer_to_encode = 0; + svc->rc_drop_superframe = 0; + svc->force_zero_mode_spatial_ref = 0; + svc->use_base_mv = 0; + svc->current_superframe = 0; + for (i = 0; i < REF_FRAMES; ++i) + svc->ref_frame_index[i] = -1; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + cpi->svc.ext_frame_flags[sl] = 0; + cpi->svc.ext_lst_fb_idx[sl] = 0; + cpi->svc.ext_gld_fb_idx[sl] = 1; + cpi->svc.ext_alt_fb_idx[sl] = 2; + } + + // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate + // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a + // target of 1/4x1/4. + if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { + if (vpx_realloc_frame_buffer(&cpi->svc.scaled_temp, + cpi->common.width >> 1, + cpi->common.height >> 1, + cpi->common.subsampling_x, + cpi->common.subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cpi->common.use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS, + cpi->common.byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate scaled_frame for svc "); + } + if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, @@ -107,15 +138,20 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { tl == 0) { size_t last_coded_q_map_size; size_t consec_zero_mv_size; + VP9_COMMON *const cm = &cpi->common; lc->sb_index = 0; - lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char)); + CHECK_MEM_ERROR(cm, lc->map, + vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map))); memset(lc->map, 0, mi_rows * mi_cols); - last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t); - lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size); + last_coded_q_map_size = mi_rows * mi_cols * + sizeof(*lc->last_coded_q_map); + CHECK_MEM_ERROR(cm, lc->last_coded_q_map, + vpx_malloc(last_coded_q_map_size)); assert(MAXQ <= 255); memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size); - consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t); - lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size); + consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv); + CHECK_MEM_ERROR(cm, lc->consec_zero_mv, + vpx_malloc(consec_zero_mv_size)); memset(lc->consec_zero_mv, 0, consec_zero_mv_size); } } @@ -277,7 +313,8 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->alt_ref_source = lc->alt_ref_source; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). - if (cpi->svc.number_temporal_layers > 1) { + if (cpi->svc.number_temporal_layers > 1 || + (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } @@ -290,12 +327,12 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; signed char *temp = cr->map; uint8_t *temp2 = cr->last_coded_q_map; - uint8_t *temp3 = cr->consec_zero_mv; + uint8_t *temp3 = cpi->consec_zero_mv; cr->map = lc->map; lc->map = temp; cr->last_coded_q_map = lc->last_coded_q_map; lc->last_coded_q_map = temp2; - cr->consec_zero_mv = lc->consec_zero_mv; + cpi->consec_zero_mv = lc->consec_zero_mv; lc->consec_zero_mv = temp3; cr->sb_index = lc->sb_index; } @@ -323,8 +360,8 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { cr->map = temp; lc->last_coded_q_map = cr->last_coded_q_map; cr->last_coded_q_map = temp2; - lc->consec_zero_mv = cr->consec_zero_mv; - cr->consec_zero_mv = temp3; + lc->consec_zero_mv = cpi->consec_zero_mv; + cpi->consec_zero_mv = temp3; lc->sb_index = cr->sb_index; } } @@ -351,6 +388,8 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { cpi->svc.number_temporal_layers]; ++lc->current_video_frame_in_layer; ++lc->frames_from_key_frame; + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) + ++cpi->svc.current_superframe; } int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { @@ -402,7 +441,9 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } @@ -417,7 +458,13 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { } else { if (frame_num_within_temporal_struct == 1) { // the first tl2 picture - if (!spatial_id) { + if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer + cpi->ext_refresh_frame_flags_pending = 1; + if (!spatial_id) + cpi->ref_frame_flags = VP9_LAST_FLAG; + else + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG; @@ -425,32 +472,38 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; - } else { // Top layer - cpi->ext_refresh_frame_flags_pending = 0; - cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else { // The second tl2 picture - if (!spatial_id) { + if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer cpi->ext_refresh_frame_flags_pending = 1; + if (!spatial_id) cpi->ref_frame_flags = VP9_LAST_FLAG; - cpi->ext_refresh_last_frame = 1; - } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + else + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; - cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; - cpi->ext_refresh_last_frame = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_alt_ref_frame = 1; } else { // top layer - cpi->ext_refresh_frame_flags_pending = 0; + cpi->ext_refresh_frame_flags_pending = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + cpi->ext_refresh_alt_ref_frame = 1; } } } if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[temporal_id].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; @@ -463,7 +516,7 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { } else { cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; - cpi->alt_fb_idx = 0; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } } @@ -485,7 +538,9 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } @@ -501,10 +556,16 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[temporal_id].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; @@ -526,20 +587,31 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering( if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[0].is_key_frame) { - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[0].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } } int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; LAYER_CONTEXT *lc = NULL; + if (cpi->svc.number_spatial_layers > 1) + cpi->svc.use_base_mv = 1; + cpi->svc.force_zero_mode_spatial_ref = 1; if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); @@ -557,6 +629,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is // needed to support the case where the frame flags may be passed in via // vpx_codec_encode(), which can be used for the temporal-only svc case. + // TODO(marpan): Consider adding an enc_config parameter to better handle + // this case. if (cpi->ext_refresh_frame_flags_pending == 0) { int sl; cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; @@ -568,6 +642,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { } } + if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) + cpi->svc.rc_drop_superframe = 0; + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; @@ -591,6 +668,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { } #if CONFIG_SPATIAL_SVC +#define SMALL_FRAME_FB_IDX 7 + int vp9_svc_start_frame(VP9_COMP *const cpi) { int width = 0, height = 0; LAYER_CONTEXT *lc; @@ -701,7 +780,8 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) { return 0; } -#endif +#undef SMALL_FRAME_FB_IDX +#endif // CONFIG_SPATIAL_SVC struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, struct lookahead_ctx *ctx, @@ -736,3 +816,27 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { } } } + +// Reset on key frame: reset counters, references and buffer updates. +void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { + int sl, tl; + SVC *const svc = &cpi->svc; + LAYER_CONTEXT *lc = NULL; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) { + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl]; + lc->current_video_frame_in_layer = 0; + lc->frames_from_key_frame = 0; + } + } + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { + set_flags_and_fb_idx_for_temporal_mode3(cpi); + } else if (svc->temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); + } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) { + set_flags_and_fb_idx_for_temporal_mode2(cpi); + } + vp9_update_temporal_layer_framerate(cpi); + vp9_restore_layer_context(cpi); +} diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h index 694b5abdc..9f386fb08 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -56,6 +56,7 @@ typedef struct { int spatial_layer_to_encode; int first_spatial_layer_to_encode; + int rc_drop_superframe; // Workaround for multiple frame contexts enum { @@ -69,6 +70,8 @@ typedef struct { // Store scaled source frames to be used for temporal filter to generate // a alt ref frame. YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; + // Temp buffer used for 2-stage down-sampling, for real-time mode. + YV12_BUFFER_CONFIG scaled_temp; // Layer context used for rate control in one pass temporal CBR mode or // two pass spatial mode. @@ -82,6 +85,10 @@ typedef struct { int ext_lst_fb_idx[VPX_MAX_LAYERS]; int ext_gld_fb_idx[VPX_MAX_LAYERS]; int ext_alt_fb_idx[VPX_MAX_LAYERS]; + int ref_frame_index[REF_FRAMES]; + int force_zero_mode_spatial_ref; + int current_superframe; + int use_base_mv; } SVC; struct VP9_COMP; @@ -129,6 +136,8 @@ int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); +void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c index 16f9c8573..b6323e048 100644 --- a/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -45,8 +45,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, int x, int y) { const int which_mv = 0; const MV mv = { mv_row, mv_col }; - const InterpKernel *const kernel = - vp9_filter_kernels[xd->mi[0]->mbmi.interp_filter]; + const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP]; enum mv_precision mv_precision_uv; int uv_stride; @@ -86,6 +85,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, return; } #endif // CONFIG_VP9_HIGHBITDEPTH + (void)xd; vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, @@ -135,15 +135,38 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int src_byte = frame1[byte]; - int pixel_value = *frame2++; - - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; - modifier *= 3; + int pixel_value = *frame2; + + // non-local mean approach + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + int row = (int)i + idy; + int col = (int)j + idx; + + if (row >= 0 && row < (int)block_height && + col >= 0 && col < (int)block_width) { + int diff = frame1[byte + idy * (int)stride + idx] - + frame2[idy * (int)block_width + idx]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) + modifier += diff_sse[idx]; + + modifier *= 3; + modifier /= index; + + ++frame2; + modifier += rounding; modifier >>= strength; @@ -182,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8, for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int src_byte = frame1[byte]; - int pixel_value = *frame2++; - - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; + int pixel_value = *frame2; + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + int row = (int)i + idy; + int col = (int)j + idx; + + if (row >= 0 && row < (int)block_height && + col >= 0 && col < (int)block_width) { + int diff = frame1[byte + idy * (int)stride + idx] - + frame2[idy * (int)block_width + idx]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) + modifier += diff_sse[idx]; + modifier *= 3; + modifier /= index; + + ++frame2; modifier += rounding; modifier >>= strength; @@ -222,8 +264,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; - int distortion; - unsigned int sse; + uint32_t distortion; + uint32_t sse; int cost_list[5]; MV best_ref_mv1 = {0, 0}; @@ -383,55 +425,57 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) - vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset, - f->y_stride, - predictor, 16, 16, adj_strength, - filter_weight, - accumulator, count); - vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset, - f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, - adj_strength, - filter_weight, accumulator + 256, - count + 256); - vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset, - f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, - adj_strength, filter_weight, - accumulator + 512, count + 512); + vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset, + f->y_stride, + predictor, 16, 16, adj_strength, + filter_weight, + accumulator, count); + vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, + f->uv_stride, predictor + 256, + mb_uv_width, mb_uv_height, + adj_strength, filter_weight, + accumulator + 256, count + 256); + vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + 512, + mb_uv_width, mb_uv_height, + adj_strength, filter_weight, + accumulator + 512, count + 512); } else { // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, + vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, + predictor, 16, 16, + strength, filter_weight, + accumulator, count); + vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 256, + count + 256); + vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 512, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 512, + count + 512); + } +#else + // Apply the filter (YUV) + // TODO(jingning): Need SIMD optimization for this. + vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, strength, filter_weight, accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, + vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 256, count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, + vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 512, count + 512); - } -#else - // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, - strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 512, - count + 512); #endif // CONFIG_VP9_HIGHBITDEPTH } } diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c index 6076e2a61..edec755dd 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.c +++ b/libvpx/vp9/encoder/vp9_tokenize.c @@ -18,7 +18,6 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_scan.h" -#include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encoder.h" @@ -50,6 +49,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = { const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens + (sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) / 2; +// The corresponding costs of the extrabits for the tokens in the above table +// are stored in the table below. The values are obtained from looking up the +// entry for the specified extrabits in the table corresponding to the token +// (as defined in cost element vp9_extra_bits) +// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1] +static const int dct_cat_lt_10_value_cost[] = { + 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, + 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190, + 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, + 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553, + 3197, 3116, 3058, 2977, 2881, 2800, + 2742, 2661, 2615, 2534, 2476, 2395, + 2299, 2218, 2160, 2079, + 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652, + 1893, 1696, 1453, 1256, 1229, 864, + 512, 512, 512, 512, 0, + 512, 512, 512, 512, + 864, 1229, 1256, 1453, 1696, 1893, + 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566, + 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615, + 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, + 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, + 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136, + 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, + 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773, +}; +const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost + + (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) + / 2; // Array indices are identical to previously-existing CONTEXT_NODE indices const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { @@ -67,303 +95,178 @@ const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { }; static const int16_t zero_cost[] = {0}; -static const int16_t one_cost[] = {255, 257}; -static const int16_t two_cost[] = {255, 257}; -static const int16_t three_cost[] = {255, 257}; -static const int16_t four_cost[] = {255, 257}; -static const int16_t cat1_cost[] = {429, 431, 616, 618}; -static const int16_t cat2_cost[] = {624, 626, 727, 729, 848, 850, 951, 953}; -static const int16_t cat3_cost[] = { - 820, 822, 893, 895, 940, 942, 1013, 1015, 1096, 1098, 1169, 1171, 1216, 1218, - 1289, 1291 -}; -static const int16_t cat4_cost[] = { - 1032, 1034, 1075, 1077, 1105, 1107, 1148, 1150, 1194, 1196, 1237, 1239, - 1267, 1269, 1310, 1312, 1328, 1330, 1371, 1373, 1401, 1403, 1444, 1446, - 1490, 1492, 1533, 1535, 1563, 1565, 1606, 1608 -}; -static const int16_t cat5_cost[] = { - 1269, 1271, 1283, 1285, 1306, 1308, 1320, - 1322, 1347, 1349, 1361, 1363, 1384, 1386, 1398, 1400, 1443, 1445, 1457, - 1459, 1480, 1482, 1494, 1496, 1521, 1523, 1535, 1537, 1558, 1560, 1572, - 1574, 1592, 1594, 1606, 1608, 1629, 1631, 1643, 1645, 1670, 1672, 1684, - 1686, 1707, 1709, 1721, 1723, 1766, 1768, 1780, 1782, 1803, 1805, 1817, - 1819, 1844, 1846, 1858, 1860, 1881, 1883, 1895, 1897 -}; +static const int16_t sign_cost[1] = {512}; +static const int16_t cat1_cost[1 << 1] = {864, 1229}; +static const int16_t cat2_cost[1 << 2] = {1256, 1453, 1696, 1893}; +static const int16_t cat3_cost[1 << 3] = {1652, 1791, 1884, 2023, + 2195, 2334, 2427, 2566}; +static const int16_t cat4_cost[1 << 4] = {2079, 2160, 2218, 2299, 2395, 2476, + 2534, 2615, 2661, 2742, 2800, 2881, + 2977, 3058, 3116, 3197}; +static const int16_t cat5_cost[1 << 5] = { + 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, + 2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363, + 3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773}; const int16_t vp9_cat6_low_cost[256] = { - 1638, 1640, 1646, 1648, 1652, 1654, 1660, 1662, - 1670, 1672, 1678, 1680, 1684, 1686, 1692, 1694, 1711, 1713, 1719, 1721, - 1725, 1727, 1733, 1735, 1743, 1745, 1751, 1753, 1757, 1759, 1765, 1767, - 1787, 1789, 1795, 1797, 1801, 1803, 1809, 1811, 1819, 1821, 1827, 1829, - 1833, 1835, 1841, 1843, 1860, 1862, 1868, 1870, 1874, 1876, 1882, 1884, - 1892, 1894, 1900, 1902, 1906, 1908, 1914, 1916, 1940, 1942, 1948, 1950, - 1954, 1956, 1962, 1964, 1972, 1974, 1980, 1982, 1986, 1988, 1994, 1996, - 2013, 2015, 2021, 2023, 2027, 2029, 2035, 2037, 2045, 2047, 2053, 2055, - 2059, 2061, 2067, 2069, 2089, 2091, 2097, 2099, 2103, 2105, 2111, 2113, - 2121, 2123, 2129, 2131, 2135, 2137, 2143, 2145, 2162, 2164, 2170, 2172, - 2176, 2178, 2184, 2186, 2194, 2196, 2202, 2204, 2208, 2210, 2216, 2218, - 2082, 2084, 2090, 2092, 2096, 2098, 2104, 2106, 2114, 2116, 2122, 2124, - 2128, 2130, 2136, 2138, 2155, 2157, 2163, 2165, 2169, 2171, 2177, 2179, - 2187, 2189, 2195, 2197, 2201, 2203, 2209, 2211, 2231, 2233, 2239, 2241, - 2245, 2247, 2253, 2255, 2263, 2265, 2271, 2273, 2277, 2279, 2285, 2287, - 2304, 2306, 2312, 2314, 2318, 2320, 2326, 2328, 2336, 2338, 2344, 2346, - 2350, 2352, 2358, 2360, 2384, 2386, 2392, 2394, 2398, 2400, 2406, 2408, - 2416, 2418, 2424, 2426, 2430, 2432, 2438, 2440, 2457, 2459, 2465, 2467, - 2471, 2473, 2479, 2481, 2489, 2491, 2497, 2499, 2503, 2505, 2511, 2513, - 2533, 2535, 2541, 2543, 2547, 2549, 2555, 2557, 2565, 2567, 2573, 2575, - 2579, 2581, 2587, 2589, 2606, 2608, 2614, 2616, 2620, 2622, 2628, 2630, - 2638, 2640, 2646, 2648, 2652, 2654, 2660, 2662 -}; -const int16_t vp9_cat6_high_cost[128] = { - 72, 892, 1183, 2003, 1448, 2268, 2559, 3379, - 1709, 2529, 2820, 3640, 3085, 3905, 4196, 5016, 2118, 2938, 3229, 4049, - 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686, 5131, 5951, 6242, 7062, - 2118, 2938, 3229, 4049, 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686, - 5131, 5951, 6242, 7062, 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471, - 5801, 6621, 6912, 7732, 7177, 7997, 8288, 9108, 2118, 2938, 3229, 4049, - 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686, 5131, 5951, 6242, 7062, - 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471, 5801, 6621, 6912, 7732, - 7177, 7997, 8288, 9108, 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471, - 5801, 6621, 6912, 7732, 7177, 7997, 8288, 9108, 6210, 7030, 7321, 8141, - 7586, 8406, 8697, 9517, 7847, 8667, 8958, 9778, 9223, 10043, 10334, 11154 -}; + 3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552, + 3574, 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763, + 3810, 3822, 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008, + 4030, 4042, 4053, 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204, + 4266, 4278, 4289, 4301, 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440, + 4462, 4474, 4485, 4497, 4253, 4265, 4276, 4288, 4310, 4322, 4333, 4345, + 4392, 4404, 4415, 4427, 4449, 4461, 4472, 4484, 4546, 4558, 4569, 4581, + 4603, 4615, 4626, 4638, 4685, 4697, 4708, 4720, 4742, 4754, 4765, 4777, + 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940, 4987, 4999, 5010, 5022, + 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198, 5210, 5221, 5233, + 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000, 5011, 5023, + 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207, 5219, + 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455, + 5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675, + 5722, 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911, + 5933, 5945, 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107, + 5863, 5875, 5886, 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037, + 6059, 6071, 6082, 6094, 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248, + 6295, 6307, 6318, 6330, 6352, 6364, 6375, 6387, 6458, 6470, 6481, 6493, + 6515, 6527, 6538, 6550, 6597, 6609, 6620, 6632, 6654, 6666, 6677, 6689, + 6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925, + 6947, 6959, 6970, 6982}; +const int vp9_cat6_high_cost[64] = { + 88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305, + 8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889, + 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666, + 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046, + 10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943, + 15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684}; #if CONFIG_VP9_HIGHBITDEPTH -const int16_t vp9_cat6_high10_high_cost[512] = { - 74, 894, 1185, 2005, 1450, 2270, 2561, - 3381, 1711, 2531, 2822, 3642, 3087, 3907, 4198, 5018, 2120, 2940, 3231, - 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133, 5953, 6244, - 7064, 2120, 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, - 5688, 5133, 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, - 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 2120, 2940, 3231, - 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133, 5953, 6244, - 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914, - 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277, 6097, 5542, 6362, 6653, - 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323, - 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, - 11156, 2120, 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, - 5688, 5133, 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, - 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277, - 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, - 9110, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, - 9780, 9225, 10045, 10336, 11156, 4166, 4986, 5277, 6097, 5542, 6362, 6653, - 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323, - 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, - 11156, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, - 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454, - 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 2120, - 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133, - 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803, - 6623, 6914, 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277, 6097, 5542, - 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, - 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, - 10045, 10336, 11156, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803, - 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323, 8143, 7588, - 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, 11156, 6212, - 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, - 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454, 10745, 11565, - 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 4166, 4986, 5277, - 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, - 9110, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, - 9780, 9225, 10045, 10336, 11156, 6212, 7032, 7323, 8143, 7588, 8408, 8699, - 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369, - 10189, 9634, 10454, 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091, - 12382, 13202, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, - 8960, 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454, - 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 8258, - 9078, 9369, 10189, 9634, 10454, 10745, 11565, 9895, 10715, 11006, 11826, - 11271, 12091, 12382, 13202, 10304, 11124, 11415, 12235, 11680, 12500, 12791, - 13611, 11941, 12761, 13052, 13872, 13317, 14137, 14428, 15248, -}; -const int16_t vp9_cat6_high12_high_cost[2048] = { - 76, 896, 1187, 2007, 1452, 2272, 2563, - 3383, 1713, 2533, 2824, 3644, 3089, 3909, 4200, 5020, 2122, 2942, 3233, - 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246, - 7066, 2122, 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, - 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, - 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 2122, 2942, 3233, - 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246, - 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, - 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544, 6364, 6655, - 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, - 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, - 11158, 2122, 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, - 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, - 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, - 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, - 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, - 9782, 9227, 10047, 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655, - 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, - 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, - 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, - 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, - 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 2122, - 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, - 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, - 6625, 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544, - 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, - 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, - 10047, 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, - 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, - 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, - 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, - 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, - 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 4168, 4988, 5279, - 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, - 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, - 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, - 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, - 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, - 12384, 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, - 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, - 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, - 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, - 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, - 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 2122, 2942, - 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955, - 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, - 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544, 6364, - 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, - 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, - 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, - 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, - 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, - 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, - 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, - 10717, 11008, 11828, 11273, 12093, 12384, 13204, 4168, 4988, 5279, 6099, - 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, - 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, - 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, - 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, - 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, - 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, - 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, - 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, - 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, - 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, - 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 4168, 4988, - 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, - 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, - 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, - 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, - 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, - 12093, 12384, 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, - 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, - 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, - 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, - 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, - 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 6214, - 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, - 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, - 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, - 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, - 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, - 12763, 13054, 13874, 13319, 14139, 14430, 15250, 8260, 9080, 9371, 10191, - 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, - 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, - 13054, 13874, 13319, 14139, 14430, 15250, 10306, 11126, 11417, 12237, 11682, - 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, - 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100, - 15920, 15365, 16185, 16476, 17296, 2122, 2942, 3233, 4053, 3498, 4318, 4609, - 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279, - 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, - 9112, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, - 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, - 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 4168, 4988, 5279, - 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, - 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, - 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, - 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, - 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, - 12384, 13204, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, - 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, - 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, - 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, - 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, - 10717, 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034, 7325, 8145, - 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, - 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, - 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456, - 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, - 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, - 13319, 14139, 14430, 15250, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, - 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, - 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, - 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, - 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, - 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034, - 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, - 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, - 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, - 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, - 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, - 13054, 13874, 13319, 14139, 14430, 15250, 6214, 7034, 7325, 8145, 7590, - 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, - 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, - 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456, 10747, - 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, - 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, - 14139, 14430, 15250, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, - 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, - 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, - 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, - 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283, - 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476, - 17296, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, - 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, - 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, - 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, - 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, - 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034, 7325, 8145, 7590, - 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, - 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, - 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456, 10747, - 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, - 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, - 14139, 14430, 15250, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, - 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, - 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, - 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, - 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, - 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 8260, - 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, - 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, - 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 10306, 11126, - 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, - 14139, 14430, 15250, 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659, - 13989, 14809, 15100, 15920, 15365, 16185, 16476, 17296, 6214, 7034, 7325, - 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, - 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, - 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, - 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, - 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, - 13874, 13319, 14139, 14430, 15250, 8260, 9080, 9371, 10191, 9636, 10456, - 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, - 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, - 13319, 14139, 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793, - 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, - 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, - 16185, 16476, 17296, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, - 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, - 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, - 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, - 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283, - 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476, - 17296, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, - 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283, 13728, - 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476, 17296, - 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100, - 15920, 15365, 16185, 16476, 17296, 14398, 15218, 15509, 16329, 15774, 16594, - 16885, 17705, 16035, 16855, 17146, 17966, 17411, 18231, 18522, 19342 -}; +const int vp9_cat6_high10_high_cost[256] = { + 94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311, + 8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895, + 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672, + 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052, + 10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949, + 15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350, + 6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, + 12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, + 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, + 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, + 17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, + 17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989, + 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464, + 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506, + 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825, + 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, + 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560, + 19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503, + 13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858, + 14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238, + 18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557, + 19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599, + 18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074, + 24237, 24713, 26876}; +const int vp9_cat6_high12_high_cost[1024] = { + 100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317, + 8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901, + 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678, + 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058, + 10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955, + 15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356, + 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, + 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, + 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, + 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, + 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, + 17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995, + 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, + 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, + 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, + 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, + 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, + 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, + 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, + 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, + 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, + 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, + 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, + 24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055, + 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410, + 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, + 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, + 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151, + 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626, + 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, + 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, + 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, + 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, + 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, + 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286, + 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, + 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, + 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, + 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, + 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, + 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, + 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, + 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, + 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, + 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, + 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, + 25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253, + 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, + 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, + 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, + 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, + 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, + 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, + 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, + 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, + 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, + 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, + 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, + 24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, + 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, + 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, + 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, + 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, + 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, + 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, + 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, + 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, + 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, + 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, + 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449, + 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, + 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, + 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, + 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, + 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, + 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181, + 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, + 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, + 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, + 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, + 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, + 27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602, + 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957, + 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, + 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656, + 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698, + 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173, + 28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334, + 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689, + 24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851, + 29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628, + 25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430, + 26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905, + 35068}; #endif const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = { {0, 0, 0, zero_cost}, // ZERO_TOKEN - {0, 0, 1, one_cost}, // ONE_TOKEN - {0, 0, 2, two_cost}, // TWO_TOKEN - {0, 0, 3, three_cost}, // THREE_TOKEN - {0, 0, 4, four_cost}, // FOUR_TOKEN + {0, 0, 1, sign_cost}, // ONE_TOKEN + {0, 0, 2, sign_cost}, // TWO_TOKEN + {0, 0, 3, sign_cost}, // THREE_TOKEN + {0, 0, 4, sign_cost}, // FOUR_TOKEN {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN @@ -375,32 +278,32 @@ const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = { #if CONFIG_VP9_HIGHBITDEPTH const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = { - {0, 0, 0, zero_cost}, // ZERO - {0, 0, 1, one_cost}, // ONE - {0, 0, 2, two_cost}, // TWO - {0, 0, 3, three_cost}, // THREE - {0, 0, 4, four_cost}, // FOUR - {vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 - {vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 - {vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 - {vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 - {vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 - {vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6 - {0, 0, 0, zero_cost} // EOB + {0, 0, 0, zero_cost}, // ZERO + {0, 0, 1, sign_cost}, // ONE + {0, 0, 2, sign_cost}, // TWO + {0, 0, 3, sign_cost}, // THREE + {0, 0, 4, sign_cost}, // FOUR + {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 + {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 + {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 + {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 + {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 + {vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0}, // CAT6 + {0, 0, 0, zero_cost} // EOB }; const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = { - {0, 0, 0, zero_cost}, // ZERO - {0, 0, 1, one_cost}, // ONE - {0, 0, 2, two_cost}, // TWO - {0, 0, 3, three_cost}, // THREE - {0, 0, 4, four_cost}, // FOUR - {vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 - {vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 - {vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 - {vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 - {vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 - {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6 - {0, 0, 0, zero_cost} // EOB + {0, 0, 0, zero_cost}, // ZERO + {0, 0, 1, sign_cost}, // ONE + {0, 0, 2, sign_cost}, // TWO + {0, 0, 3, sign_cost}, // THREE + {0, 0, 4, sign_cost}, // FOUR + {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 + {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 + {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 + {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 + {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 + {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6 + {0, 0, 0, zero_cost} // EOB }; #endif @@ -431,35 +334,25 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, - int32_t extra, uint8_t token, - uint8_t skip_eob_node, + int16_t token, EXTRABIT extra, unsigned int *counts) { + (*t)->context_tree = context_tree; (*t)->token = token; (*t)->extra = extra; - (*t)->context_tree = context_tree; - (*t)->skip_eob_node = skip_eob_node; (*t)++; ++counts[token]; } static INLINE void add_token_no_extra(TOKENEXTRA **t, const vpx_prob *context_tree, - uint8_t token, - uint8_t skip_eob_node, + int16_t token, unsigned int *counts) { - (*t)->token = token; (*t)->context_tree = context_tree; - (*t)->skip_eob_node = skip_eob_node; + (*t)->token = token; (*t)++; ++counts[token]; } -static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id, - TX_SIZE tx_size) { - const int eob_max = 16 << (tx_size << 1); - return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; -} - static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; @@ -471,17 +364,16 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi = xd->mi[0]; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ int eob = p->eobs[block]; const PLANE_TYPE type = get_plane_type(plane); const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; const scan_order *so; - const int ref = is_inter_block(mbmi); + const int ref = is_inter_block(mi); unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[tx_size][type][ref]; vpx_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = @@ -489,7 +381,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, unsigned int (*const eob_branch)[COEFF_CONTEXTS] = td->counts->eob_branch[tx_size][type][ref]; const uint8_t *const band = get_band_translate(tx_size); - const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); + const int tx_eob = 16 << (tx_size << 1); int16_t token; EXTRABIT extra; int aoff, loff; @@ -504,15 +396,13 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, while (c < eob) { int v = 0; - int skip_eob = 0; v = qcoeff[scan[c]]; + ++eob_branch[band[c]][pt]; while (!v) { - add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob, + add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; - skip_eob = 1; token_cache[scan[c]] = 0; ++c; pt = get_coef_context(nb, token_cache, c); @@ -521,18 +411,17 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, vp9_get_token_extra(v, &token, &extra); - add_token(&t, coef_probs[band[c]][pt], extra, (uint8_t)token, - (uint8_t)skip_eob, counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; + add_token(&t, coef_probs[band[c]][pt], token, extra, + counts[band[c]][pt]); token_cache[scan[c]] = vp9_pt_energy_class[token]; ++c; pt = get_coef_context(nb, token_cache, c); } - if (c < seg_eob) { - add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0, - counts[band[c]][pt]); + if (c < tx_eob) { ++eob_branch[band[c]][pt]; + add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, + counts[band[c]][pt]); } *tp = t; @@ -584,24 +473,26 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { } void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, - int dry_run, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &cpi->common; + int dry_run, int seg_skip, BLOCK_SIZE bsize) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *const mi = xd->mi[0]; const int ctx = vp9_get_skip_context(xd); - const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, - SEG_LVL_SKIP); struct tokenize_b_args arg = {cpi, td, t}; - if (mbmi->skip) { - if (!dry_run) - td->counts->skip[ctx][1] += skip_inc; + + if (seg_skip) { + assert(mi->skip); + } + + if (mi->skip) { + if (!dry_run && !seg_skip) + ++td->counts->skip[ctx][1]; reset_skip_context(xd, bsize); return; } if (!dry_run) { - td->counts->skip[ctx][0] += skip_inc; + ++td->counts->skip[ctx][0]; vp9_foreach_transformed_block(xd, bsize, tokenize_b, &arg); } else { vp9_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h index c0f09c7b2..1caab2ac1 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.h +++ b/libvpx/vp9/encoder/vp9_tokenize.h @@ -36,9 +36,8 @@ typedef struct { typedef struct { const vpx_prob *context_tree; + int16_t token; EXTRABIT extra; - uint8_t token; - uint8_t skip_eob_node; } TOKENEXTRA; extern const vpx_tree_index vp9_coef_tree[]; @@ -52,7 +51,8 @@ struct VP9_COMP; struct ThreadData; void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td, - TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); + TOKENEXTRA **t, int dry_run, int seg_skip, + BLOCK_SIZE bsize); typedef struct { const vpx_prob *prob; @@ -75,26 +75,27 @@ extern const int16_t *vp9_dct_value_cost_ptr; */ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens; +extern const int *vp9_dct_cat_lt_10_value_cost; extern const int16_t vp9_cat6_low_cost[256]; -extern const int16_t vp9_cat6_high_cost[128]; -extern const int16_t vp9_cat6_high10_high_cost[512]; -extern const int16_t vp9_cat6_high12_high_cost[2048]; -static INLINE int16_t vp9_get_cost(int16_t token, EXTRABIT extrabits, - const int16_t *cat6_high_table) { +extern const int vp9_cat6_high_cost[64]; +extern const int vp9_cat6_high10_high_cost[256]; +extern const int vp9_cat6_high12_high_cost[1024]; +static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits, + const int *cat6_high_table) { if (token != CATEGORY6_TOKEN) - return vp9_extra_bits[token].cost[extrabits]; - return vp9_cat6_low_cost[extrabits & 0xff] - + cat6_high_table[extrabits >> 8]; + return vp9_extra_bits[token].cost[extrabits >> 1]; + return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] + + cat6_high_table[extrabits >> 9]; } #if CONFIG_VP9_HIGHBITDEPTH -static INLINE const int16_t* vp9_get_high_cost_table(int bit_depth) { +static INLINE const int* vp9_get_high_cost_table(int bit_depth) { return bit_depth == 8 ? vp9_cat6_high_cost : (bit_depth == 10 ? vp9_cat6_high10_high_cost : vp9_cat6_high12_high_cost); } #else -static INLINE const int16_t* vp9_get_high_cost_table(int bit_depth) { +static INLINE const int* vp9_get_high_cost_table(int bit_depth) { (void) bit_depth; return vp9_cat6_high_cost; } @@ -118,6 +119,18 @@ static INLINE int16_t vp9_get_token(int v) { return vp9_dct_cat_lt_10_value_tokens[v].token; } +static INLINE int vp9_get_token_cost(int v, int16_t *token, + const int *cat6_high_table) { + if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { + EXTRABIT extrabits; + *token = CATEGORY6_TOKEN; + extrabits = abs(v) - CAT6_MIN_VAL; + return vp9_cat6_low_cost[extrabits & 0xff] + + cat6_high_table[extrabits >> 8]; + } + *token = vp9_dct_cat_lt_10_value_tokens[v].token; + return vp9_dct_cat_lt_10_value_cost[v]; +} #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c index fa37b6fed..fa37b6fed 100644 --- a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c +++ b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c diff --git a/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm b/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm deleted file mode 100644 index 7a7a6b655..000000000 --- a/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm +++ /dev/null @@ -1,104 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%define private_prefix vp9 - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro TRANSFORM_COLS 0 - paddw m0, m1 - movq m4, m0 - psubw m3, m2 - psubw m4, m3 - psraw m4, 1 - movq m5, m4 - psubw m5, m1 ;b1 - psubw m4, m2 ;c1 - psubw m0, m4 - paddw m3, m5 - ; m0 a0 - SWAP 1, 4 ; m1 c1 - SWAP 2, 3 ; m2 d1 - SWAP 3, 5 ; m3 b1 -%endmacro - -%macro TRANSPOSE_4X4 0 - movq m4, m0 - movq m5, m2 - punpcklwd m4, m1 - punpckhwd m0, m1 - punpcklwd m5, m3 - punpckhwd m2, m3 - movq m1, m4 - movq m3, m0 - punpckldq m1, m5 - punpckhdq m4, m5 - punpckldq m3, m2 - punpckhdq m0, m2 - SWAP 2, 3, 0, 1, 4 -%endmacro - -INIT_MMX mmx -cglobal fwht4x4, 3, 4, 8, input, output, stride - lea r3q, [inputq + strideq*4] - movq m0, [inputq] ;a1 - movq m1, [inputq + strideq*2] ;b1 - movq m2, [r3q] ;c1 - movq m3, [r3q + strideq*2] ;d1 - - TRANSFORM_COLS - TRANSPOSE_4X4 - TRANSFORM_COLS - TRANSPOSE_4X4 - - psllw m0, 2 - psllw m1, 2 - psllw m2, 2 - psllw m3, 2 - -%if CONFIG_VP9_HIGHBITDEPTH - pxor m4, m4 - pxor m5, m5 - pcmpgtw m4, m0 - pcmpgtw m5, m1 - movq m6, m0 - movq m7, m1 - punpcklwd m0, m4 - punpcklwd m1, m5 - punpckhwd m6, m4 - punpckhwd m7, m5 - movq [outputq], m0 - movq [outputq + 8], m6 - movq [outputq + 16], m1 - movq [outputq + 24], m7 - pxor m4, m4 - pxor m5, m5 - pcmpgtw m4, m2 - pcmpgtw m5, m3 - movq m6, m2 - movq m7, m3 - punpcklwd m2, m4 - punpcklwd m3, m5 - punpckhwd m6, m4 - punpckhwd m7, m5 - movq [outputq + 32], m2 - movq [outputq + 40], m6 - movq [outputq + 48], m3 - movq [outputq + 56], m7 -%else - movq [outputq], m0 - movq [outputq + 8], m1 - movq [outputq + 16], m2 - movq [outputq + 24], m3 -%endif - - RET diff --git a/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm b/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm new file mode 100644 index 000000000..ced37bd16 --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm @@ -0,0 +1,86 @@ +; +; Copyright (c) 2016 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%define private_prefix vp9 + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +%macro TRANSFORM_COLS 0 + paddw m0, m1 + movq m4, m0 + psubw m3, m2 + psubw m4, m3 + psraw m4, 1 + movq m5, m4 + psubw m5, m1 ;b1 + psubw m4, m2 ;c1 + psubw m0, m4 + paddw m3, m5 + ; m0 a0 + SWAP 1, 4 ; m1 c1 + SWAP 2, 3 ; m2 d1 + SWAP 3, 5 ; m3 b1 +%endmacro + +%macro TRANSPOSE_4X4 0 + ; 00 01 02 03 + ; 10 11 12 13 + ; 20 21 22 23 + ; 30 31 32 33 + punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13 + punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33 + mova m1, m0 + punpckldq m0, m2 ; 00 10 20 30 01 11 21 31 + punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33 +%endmacro + +INIT_XMM sse2 +cglobal fwht4x4, 3, 4, 8, input, output, stride + lea r3q, [inputq + strideq*4] + movq m0, [inputq] ;a1 + movq m1, [inputq + strideq*2] ;b1 + movq m2, [r3q] ;c1 + movq m3, [r3q + strideq*2] ;d1 + + TRANSFORM_COLS + TRANSPOSE_4X4 + SWAP 1, 2 + psrldq m1, m0, 8 + psrldq m3, m2, 8 + TRANSFORM_COLS + TRANSPOSE_4X4 + + psllw m0, 2 + psllw m1, 2 + +%if CONFIG_VP9_HIGHBITDEPTH + ; sign extension + mova m2, m0 + mova m3, m1 + punpcklwd m0, m0 + punpcklwd m1, m1 + punpckhwd m2, m2 + punpckhwd m3, m3 + psrad m0, 16 + psrad m1, 16 + psrad m2, 16 + psrad m3, 16 + mova [outputq], m0 + mova [outputq + 16], m2 + mova [outputq + 32], m1 + mova [outputq + 48], m3 +%else + mova [outputq], m0 + mova [outputq + 16], m1 +%endif + + RET diff --git a/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c b/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c index bf7c7af77..883507af3 100644 --- a/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c +++ b/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c @@ -125,7 +125,7 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2( return acc_diff; } -// Denoiser for 4xM and 8xM blocks. +// Denoise 8x8 and 8x16 blocks. static int vp9_denoiser_NxM_sse2_small( const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, @@ -147,9 +147,9 @@ static int vp9_denoiser_NxM_sse2_small( const __m128i l32 = _mm_set1_epi8(2); // Difference between level 2 and level 1 is 1. const __m128i l21 = _mm_set1_epi8(1); - const uint8_t shift = (width == 4) ? 2 : 1; + const int b_height = (4 << b_height_log2_lookup[bs]) >> 1; - for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { + for (r = 0; r < b_height; ++r) { memcpy(sig_buffer[r], sig, width); memcpy(sig_buffer[r] + width, sig + sig_stride, width); memcpy(mc_running_buffer[r], mc_running_avg_y, width); @@ -157,18 +157,6 @@ static int vp9_denoiser_NxM_sse2_small( mc_running_avg_y + mc_avg_y_stride, width); memcpy(running_buffer[r], running_avg_y, width); memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); - if (width == 4) { - memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); - memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); - memcpy(mc_running_buffer[r] + width * 2, - mc_running_avg_y + mc_avg_y_stride * 2, width); - memcpy(mc_running_buffer[r] + width * 3, - mc_running_avg_y + mc_avg_y_stride * 3, width); - memcpy(running_buffer[r] + width * 2, - running_avg_y + avg_y_stride * 2, width); - memcpy(running_buffer[r] + width * 3, - running_avg_y + avg_y_stride * 3, width); - } acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], @@ -176,16 +164,10 @@ static int vp9_denoiser_NxM_sse2_small( &l3, &l32, &l21, acc_diff); memcpy(running_avg_y, running_buffer[r], width); memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); - if (width == 4) { - memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); - } // Update pointers for next iteration. - sig += (sig_stride << shift); - mc_running_avg_y += (mc_avg_y_stride << shift); - running_avg_y += (avg_y_stride << shift); + sig += (sig_stride << 1); + mc_running_avg_y += (mc_avg_y_stride << 1); + running_avg_y += (avg_y_stride << 1); } { @@ -207,22 +189,16 @@ static int vp9_denoiser_NxM_sse2_small( // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); - running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); - for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { + running_avg_y -= avg_y_stride * (b_height << 1); + for (r = 0; r < b_height; ++r) { acc_diff = vp9_denoiser_adj_16x1_sse2( sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0, k_delta, acc_diff); memcpy(running_avg_y, running_buffer[r], width); memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); - if (width == 4) { - memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); - } // Update pointers for next iteration. - running_avg_y += (avg_y_stride << shift); + running_avg_y += (avg_y_stride << 1); } sum_diff = sum_diff_16x1(acc_diff); if (abs(sum_diff) > sum_diff_thresh) { @@ -236,7 +212,7 @@ static int vp9_denoiser_NxM_sse2_small( return FILTER_BLOCK; } -// Denoiser for 16xM, 32xM and 64xM blocks +// Denoise 16x16, 16x32, 32x16, 32x32, 32x64, 64x32 and 64x64 blocks. static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, @@ -260,38 +236,37 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride, const __m128i l32 = _mm_set1_epi8(2); // Difference between level 2 and level 1 is 1. const __m128i l21 = _mm_set1_epi8(1); + const int b_width = (4 << b_width_log2_lookup[bs]); + const int b_height = (4 << b_height_log2_lookup[bs]); + const int b_width_shift4 = b_width >> 4; - for (c = 0; c < 4; ++c) { - for (r = 0; r < 4; ++r) { + for (r = 0; r < 4; ++r) { + for (c = 0; c < b_width_shift4; ++c) { acc_diff[c][r] = _mm_setzero_si128(); } } - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2( + for (r = 0; r < b_height; ++r) { + for (c = 0; c < b_width_shift4; ++c) { + acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2( sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, - &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]); + &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } - if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]); + if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { + for (c = 0; c < b_width_shift4; ++c) { + sum_diff += sum_diff_16x1(acc_diff[c][r>>4]); } } // Update pointers for next iteration. - sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride; - mc_running_avg_y = mc_running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - mc_avg_y_stride; - running_avg_y = running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - avg_y_stride; + sig = sig - b_width + sig_stride; + mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; + running_avg_y = running_avg_y - b_width + avg_y_stride; } { @@ -303,33 +278,29 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride, // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); - sig -= sig_stride * (4 << b_height_log2_lookup[bs]); - mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]); - running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); + sig -= sig_stride * b_height; + mc_running_avg_y -= mc_avg_y_stride * b_height; + running_avg_y -= avg_y_stride * b_height; sum_diff = 0; - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - acc_diff[c>>4][r>>4] = vp9_denoiser_adj_16x1_sse2( + for (r = 0; r < b_height; ++r) { + for (c = 0; c < b_width_shift4; ++c) { + acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2( sig, mc_running_avg_y, running_avg_y, k_0, - k_delta, acc_diff[c>>4][r>>4]); + k_delta, acc_diff[c][r>>4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } - if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]); + if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { + for (c = 0; c < b_width_shift4; ++c) { + sum_diff += sum_diff_16x1(acc_diff[c][r>>4]); } } - sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride; - mc_running_avg_y = mc_running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - mc_avg_y_stride; - running_avg_y = running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - avg_y_stride; + sig = sig - b_width + sig_stride; + mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; + running_avg_y = running_avg_y - b_width + avg_y_stride; } if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; @@ -349,26 +320,21 @@ int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { - if (bs == BLOCK_4X4 || bs == BLOCK_4X8) { - return vp9_denoiser_NxM_sse2_small(sig, sig_stride, - mc_avg, mc_avg_stride, - avg, avg_stride, - increase_denoising, - bs, motion_magnitude, 4); - } else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) { - return vp9_denoiser_NxM_sse2_small(sig, sig_stride, - mc_avg, mc_avg_stride, - avg, avg_stride, - increase_denoising, - bs, motion_magnitude, 8); - } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || - bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 || - bs == BLOCK_64X32 || bs == BLOCK_64X64) { + // Rank by frequency of the block type to have an early termination. + if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 || + bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 || + bs == BLOCK_32X64 || bs == BLOCK_64X32) { return vp9_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude); + } else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) { + return vp9_denoiser_NxM_sse2_small(sig, sig_stride, + mc_avg, mc_avg_stride, + avg, avg_stride, + increase_denoising, + bs, motion_magnitude, 8); } else { return COPY_BLOCK; } diff --git a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c new file mode 100644 index 000000000..cd3e87ec8 --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#if defined(_MSC_VER) +# include <intrin.h> +#endif +#include <emmintrin.h> +#include <smmintrin.h> + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vpx_ports/mem.h" + +#ifdef __GNUC__ +# define LIKELY(v) __builtin_expect(v, 1) +# define UNLIKELY(v) __builtin_expect(v, 0) +#else +# define LIKELY(v) (v) +# define UNLIKELY(v) (v) +#endif + +static INLINE int_mv pack_int_mv(int16_t row, int16_t col) { + int_mv result; + result.as_mv.row = row; + result.as_mv.col = col; + return result; +} + +static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) { + // This is simplified from the C implementation to utilise that + // x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and + // x->nmvjointsadcost[1] == x->nmvjointsadcost[3] + return mv.as_int == 0 ? 0 : 1; +} + +static INLINE int mv_cost(const int_mv mv, + const int *joint_cost, int *const comp_cost[2]) { + return joint_cost[get_mv_joint(mv)] + + comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col]; +} + +static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref, + int sad_per_bit) { + const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row, + mv.as_mv.col - ref->col); + return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost, + x->nmvsadcost) * + sad_per_bit, VP9_PROB_COST_SHIFT); +} + +/***************************************************************************** + * This function utilizes 3 properties of the cost function lookup tables, * + * constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in * + * vp9_encoder.c. * + * For the joint cost: * + * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] * + * For the component costs: * + * - For all i: mvsadcost[0][i] == mvsadcost[1][i] * + * (Equal costs for both components) * + * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] * + * (Cost function is even) * + * If these do not hold, then this function cannot be used without * + * modification, in which case you can revert to using the C implementation, * + * which does not rely on these properties. * + *****************************************************************************/ +int vp9_diamond_search_sad_avx(const MACROBLOCK *x, + const search_site_config *cfg, + MV *ref_mv, MV *best_mv, int search_param, + int sad_per_bit, int *num00, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv) { + const int_mv maxmv = pack_int_mv(x->mv_row_max, x->mv_col_max); + const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int); + const int_mv minmv = pack_int_mv(x->mv_row_min, x->mv_col_min); + const __m128i v_min_mv_w = _mm_set1_epi32(minmv.as_int); + + const __m128i v_spb_d = _mm_set1_epi32(sad_per_bit); + + const __m128i v_joint_cost_0_d = _mm_set1_epi32(x->nmvjointsadcost[0]); + const __m128i v_joint_cost_1_d = _mm_set1_epi32(x->nmvjointsadcost[1]); + + // search_param determines the length of the initial step and hence the number + // of iterations. + // 0 = initial step (MAX_FIRST_STEP) pel + // 1 = (MAX_FIRST_STEP/2) pel, + // 2 = (MAX_FIRST_STEP/4) pel... + const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param]; + const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param]; + const int tot_steps = cfg->total_steps - search_param; + + const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3, + center_mv->col >> 3); + const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int); + + const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row); + const int ref_col = clamp(ref_mv->col, minmv.as_mv.col, maxmv.as_mv.col); + + int_mv bmv = pack_int_mv(ref_row, ref_col); + int_mv new_bmv = bmv; + __m128i v_bmv_w = _mm_set1_epi32(bmv.as_int); + + const int what_stride = x->plane[0].src.stride; + const int in_what_stride = x->e_mbd.plane[0].pre[0].stride; + const uint8_t *const what = x->plane[0].src.buf; + const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf + + ref_row * in_what_stride + ref_col; + + // Work out the start point for the search + const uint8_t *best_address = in_what; + const uint8_t *new_best_address = best_address; +#if ARCH_X86_64 + __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address); +#else + __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address); +#endif + + unsigned int best_sad; + int i, j, step; + + // Check the prerequisite cost function properties that are easy to check + // in an assert. See the function-level documentation for details on all + // prerequisites. + assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[2]); + assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[3]); + + // Check the starting position + best_sad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); + best_sad += mvsad_err_cost(x, bmv, &fcenter_mv.as_mv, sad_per_bit); + + *num00 = 0; + + for (i = 0, step = 0; step < tot_steps; step++) { + for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) { + __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w; +#if ARCH_X86_64 + __m128i v_blocka[2]; +#else + __m128i v_blocka[1]; +#endif + + // Compute the candidate motion vectors + const __m128i v_ss_mv_w = _mm_loadu_si128((const __m128i *)&ss_mv[i]); + const __m128i v_these_mv_w = _mm_add_epi16(v_bmv_w, v_ss_mv_w); + // Clamp them to the search bounds + __m128i v_these_mv_clamp_w = v_these_mv_w; + v_these_mv_clamp_w = _mm_min_epi16(v_these_mv_clamp_w, v_max_mv_w); + v_these_mv_clamp_w = _mm_max_epi16(v_these_mv_clamp_w, v_min_mv_w); + // The ones that did not change are inside the search area + v_inside_d = _mm_cmpeq_epi32(v_these_mv_clamp_w, v_these_mv_w); + + // If none of them are inside, then move on + if (LIKELY(_mm_test_all_zeros(v_inside_d, v_inside_d))) { + continue; + } + + // The inverse mask indicates which of the MVs are outside + v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8(0xff)); + // Shift right to keep the sign bit clear, we will use this later + // to set the cost to the maximum value. + v_outside_d = _mm_srli_epi32(v_outside_d, 1); + + // Compute the difference MV + v_diff_mv_w = _mm_sub_epi16(v_these_mv_clamp_w, vfcmv); + // We utilise the fact that the cost function is even, and use the + // absolute difference. This allows us to use unsigned indexes later + // and reduces cache pressure somewhat as only a half of the table + // is ever referenced. + v_diff_mv_w = _mm_abs_epi16(v_diff_mv_w); + + // Compute the SIMD pointer offsets. + { +#if ARCH_X86_64 // sizeof(intptr_t) == 8 + // Load the offsets + __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]); + __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]); + // Set the ones falling outside to zero + v_bo10_q = _mm_and_si128(v_bo10_q, + _mm_cvtepi32_epi64(v_inside_d)); + v_bo32_q = _mm_and_si128(v_bo32_q, + _mm_unpackhi_epi32(v_inside_d, v_inside_d)); + // Compute the candidate addresses + v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q); + v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q); +#else // ARCH_X86 // sizeof(intptr_t) == 4 + __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]); + v_bo_d = _mm_and_si128(v_bo_d, v_inside_d); + v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d); +#endif + } + + fn_ptr->sdx4df(what, what_stride, + (const uint8_t **)&v_blocka[0], in_what_stride, + (uint32_t*)&v_sad_d); + + // Look up the component cost of the residual motion vector + { + const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0); + const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1); + const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2); + const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3); + const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4); + const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5); + const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6); + const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7); + + // Note: This is a use case for vpgather in AVX2 + const uint32_t cost0 = x->nmvsadcost[0][row0] + x->nmvsadcost[0][col0]; + const uint32_t cost1 = x->nmvsadcost[0][row1] + x->nmvsadcost[0][col1]; + const uint32_t cost2 = x->nmvsadcost[0][row2] + x->nmvsadcost[0][col2]; + const uint32_t cost3 = x->nmvsadcost[0][row3] + x->nmvsadcost[0][col3]; + + __m128i v_cost_10_d, v_cost_32_d; + v_cost_10_d = _mm_cvtsi32_si128(cost0); + v_cost_10_d = _mm_insert_epi32(v_cost_10_d, cost1, 1); + v_cost_32_d = _mm_cvtsi32_si128(cost2); + v_cost_32_d = _mm_insert_epi32(v_cost_32_d, cost3, 1); + v_cost_d = _mm_unpacklo_epi64(v_cost_10_d, v_cost_32_d); + } + + // Now add in the joint cost + { + const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w, + _mm_setzero_si128()); + const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d, + v_joint_cost_0_d, + v_sel_d); + v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d); + } + + // Multiply by sad_per_bit + v_cost_d = _mm_mullo_epi32(v_cost_d, v_spb_d); + // ROUND_POWER_OF_TWO(v_cost_d, VP9_PROB_COST_SHIFT) + v_cost_d = _mm_add_epi32(v_cost_d, + _mm_set1_epi32(1 << (VP9_PROB_COST_SHIFT - 1))); + v_cost_d = _mm_srai_epi32(v_cost_d, VP9_PROB_COST_SHIFT); + // Add the cost to the sad + v_sad_d = _mm_add_epi32(v_sad_d, v_cost_d); + + // Make the motion vectors outside the search area have max cost + // by or'ing in the comparison mask, this way the minimum search won't + // pick them. + v_sad_d = _mm_or_si128(v_sad_d, v_outside_d); + + // Find the minimum value and index horizontally in v_sad_d + { + // Try speculatively on 16 bits, so we can use the minpos intrinsic + const __m128i v_sad_w = _mm_packus_epi32(v_sad_d, v_sad_d); + const __m128i v_minp_w = _mm_minpos_epu16(v_sad_w); + + uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0); + uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1); + + // If the local best value is not saturated, just use it, otherwise + // find the horizontal minimum again the hard way on 32 bits. + // This is executed rarely. + if (UNLIKELY(local_best_sad == 0xffff)) { + __m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d; + + v_loval_d = v_sad_d; + v_loidx_d = _mm_set_epi32(3, 2, 1, 0); + v_hival_d = _mm_srli_si128(v_loval_d, 8); + v_hiidx_d = _mm_srli_si128(v_loidx_d, 8); + + v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); + + v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); + v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d); + v_hival_d = _mm_srli_si128(v_loval_d, 4); + v_hiidx_d = _mm_srli_si128(v_loidx_d, 4); + + v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); + + v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); + v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d); + + local_best_sad = _mm_extract_epi32(v_loval_d, 0); + local_best_idx = _mm_extract_epi32(v_loidx_d, 0); + } + + // Update the global minimum if the local minimum is smaller + if (LIKELY(local_best_sad < best_sad)) { + new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx]; + new_best_address = ((const uint8_t **)v_blocka)[local_best_idx]; + + best_sad = local_best_sad; + } + } + } + + bmv = new_bmv; + best_address = new_best_address; + + v_bmv_w = _mm_set1_epi32(bmv.as_int); +#if ARCH_X86_64 + v_ba_q = _mm_set1_epi64x((intptr_t)best_address); +#else + v_ba_d = _mm_set1_epi32((intptr_t)best_address); +#endif + + if (UNLIKELY(best_address == in_what)) { + (*num00)++; + } + } + + *best_mv = bmv.as_mv; + return best_sad; +} diff --git a/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c b/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c new file mode 100644 index 000000000..38af3b13a --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#if defined(_MSC_VER) && _MSC_VER <= 1500 +// Need to include math.h before calling tmmintrin.h/intrin.h +// in certain versions of MSVS. +#include <math.h> +#endif +#include <tmmintrin.h> // SSSE3 + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" +#include "vpx_scale/yv12config.h" + +extern void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst); + +static void downsample_2_to_1_ssse3(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int w, int h) { + const __m128i mask = _mm_set1_epi16(0x00FF); + const int max_width = w & ~15; + int y; + for (y = 0; y < h; ++y) { + int x; + for (x = 0; x < max_width; x += 16) { + const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 + 0)); + const __m128i b = _mm_loadu_si128((const __m128i *)(src + x * 2 + 16)); + const __m128i a_and = _mm_and_si128(a, mask); + const __m128i b_and = _mm_and_si128(b, mask); + const __m128i c = _mm_packus_epi16(a_and, b_and); + _mm_storeu_si128((__m128i *)(dst + x), c); + } + for (; x < w; ++x) + dst[x] = src[x * 2]; + src += src_stride * 2; + dst += dst_stride; + } +} + +static INLINE __m128i filter(const __m128i *const a, const __m128i *const b, + const __m128i *const c, const __m128i *const d, + const __m128i *const e, const __m128i *const f, + const __m128i *const g, const __m128i *const h) { + const __m128i coeffs_ab = + _mm_set_epi8(6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1); + const __m128i coeffs_cd = + _mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78, -19, 78, -19, + 78, -19, 78, -19); + const __m128i const64_x16 = _mm_set1_epi16(64); + const __m128i ab = _mm_unpacklo_epi8(*a, *b); + const __m128i cd = _mm_unpacklo_epi8(*c, *d); + const __m128i fe = _mm_unpacklo_epi8(*f, *e); + const __m128i hg = _mm_unpacklo_epi8(*h, *g); + const __m128i ab_terms = _mm_maddubs_epi16(ab, coeffs_ab); + const __m128i cd_terms = _mm_maddubs_epi16(cd, coeffs_cd); + const __m128i fe_terms = _mm_maddubs_epi16(fe, coeffs_cd); + const __m128i hg_terms = _mm_maddubs_epi16(hg, coeffs_ab); + // can not overflow + const __m128i abcd_terms = _mm_add_epi16(ab_terms, cd_terms); + // can not overflow + const __m128i fehg_terms = _mm_add_epi16(fe_terms, hg_terms); + // can overflow, use saturating add + const __m128i terms = _mm_adds_epi16(abcd_terms, fehg_terms); + const __m128i round = _mm_adds_epi16(terms, const64_x16); + const __m128i shift = _mm_srai_epi16(round, 7); + return _mm_packus_epi16(shift, shift); +} + +static void eight_tap_row_ssse3(const uint8_t *src, uint8_t *dst, int w) { + const int max_width = w & ~7; + int x = 0; + for (; x < max_width; x += 8) { + const __m128i a = _mm_loadl_epi64((const __m128i *)(src + x + 0)); + const __m128i b = _mm_loadl_epi64((const __m128i *)(src + x + 1)); + const __m128i c = _mm_loadl_epi64((const __m128i *)(src + x + 2)); + const __m128i d = _mm_loadl_epi64((const __m128i *)(src + x + 3)); + const __m128i e = _mm_loadl_epi64((const __m128i *)(src + x + 4)); + const __m128i f = _mm_loadl_epi64((const __m128i *)(src + x + 5)); + const __m128i g = _mm_loadl_epi64((const __m128i *)(src + x + 6)); + const __m128i h = _mm_loadl_epi64((const __m128i *)(src + x + 7)); + const __m128i pack = filter(&a, &b, &c, &d, &e, &f, &g, &h); + _mm_storel_epi64((__m128i *)(dst + x), pack); + } +} + +static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int dst_w, int dst_h) { + dst_w /= 2; + dst_h /= 2; + { + DECLARE_ALIGNED(16, uint8_t, tmp[1920 * 8]); + uint8_t *tmp0 = tmp + dst_w * 0; + uint8_t *tmp1 = tmp + dst_w * 1; + uint8_t *tmp2 = tmp + dst_w * 2; + uint8_t *tmp3 = tmp + dst_w * 3; + uint8_t *tmp4 = tmp + dst_w * 4; + uint8_t *tmp5 = tmp + dst_w * 5; + uint8_t *tmp6 = tmp + dst_w * 6; + uint8_t *tmp7 = tmp + dst_w * 7; + uint8_t *tmp8 = NULL; + const int max_width = dst_w & ~7; + int y; + eight_tap_row_ssse3(src - src_stride * 3 - 3, tmp0, dst_w); + eight_tap_row_ssse3(src - src_stride * 2 - 3, tmp1, dst_w); + eight_tap_row_ssse3(src - src_stride * 1 - 3, tmp2, dst_w); + eight_tap_row_ssse3(src + src_stride * 0 - 3, tmp3, dst_w); + eight_tap_row_ssse3(src + src_stride * 1 - 3, tmp4, dst_w); + eight_tap_row_ssse3(src + src_stride * 2 - 3, tmp5, dst_w); + eight_tap_row_ssse3(src + src_stride * 3 - 3, tmp6, dst_w); + for (y = 0; y < dst_h; y++) { + int x; + eight_tap_row_ssse3(src + src_stride * 4 - 3, tmp7, dst_w); + for (x = 0; x < max_width; x += 8) { + const __m128i A = _mm_loadl_epi64((const __m128i *)(src + x)); + const __m128i B = _mm_loadl_epi64((const __m128i *)(tmp3 + x)); + const __m128i AB = _mm_unpacklo_epi8(A, B); + __m128i C, D, CD; + _mm_storeu_si128((__m128i *)(dst + x * 2), AB); + { + const __m128i a = + _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 3)); + const __m128i b = + _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 2)); + const __m128i c = + _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 1)); + const __m128i d = + _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 0)); + const __m128i e = + _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 1)); + const __m128i f = + _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 2)); + const __m128i g = + _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 3)); + const __m128i h = + _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 4)); + C = filter(&a, &b, &c, &d, &e, &f, &g, &h); + } + { + const __m128i a = _mm_loadl_epi64((const __m128i *)(tmp0 + x)); + const __m128i b = _mm_loadl_epi64((const __m128i *)(tmp1 + x)); + const __m128i c = _mm_loadl_epi64((const __m128i *)(tmp2 + x)); + const __m128i d = _mm_loadl_epi64((const __m128i *)(tmp3 + x)); + const __m128i e = _mm_loadl_epi64((const __m128i *)(tmp4 + x)); + const __m128i f = _mm_loadl_epi64((const __m128i *)(tmp5 + x)); + const __m128i g = _mm_loadl_epi64((const __m128i *)(tmp6 + x)); + const __m128i h = _mm_loadl_epi64((const __m128i *)(tmp7 + x)); + D = filter(&a, &b, &c, &d, &e, &f, &g, &h); + } + CD = _mm_unpacklo_epi8(C, D); + _mm_storeu_si128((__m128i *)(dst + x * 2 + dst_stride), CD); + } + src += src_stride; + dst += dst_stride * 2; + tmp8 = tmp0; + tmp0 = tmp1; + tmp1 = tmp2; + tmp2 = tmp3; + tmp3 = tmp4; + tmp4 = tmp5; + tmp5 = tmp6; + tmp6 = tmp7; + tmp7 = tmp8; + } + } +} + +void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + const int src_w = src->y_crop_width; + const int src_h = src->y_crop_height; + const int dst_w = dst->y_crop_width; + const int dst_h = dst->y_crop_height; + const int dst_uv_w = dst_w / 2; + const int dst_uv_h = dst_h / 2; + + if (dst_w * 2 == src_w && dst_h * 2 == src_h) { + downsample_2_to_1_ssse3(src->y_buffer, src->y_stride, + dst->y_buffer, dst->y_stride, dst_w, dst_h); + downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride, + dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); + downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride, + dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); + vpx_extend_frame_borders(dst); + } else if (dst_w == src_w * 2 && dst_h == src_h * 2) { + // The upsample() supports widths up to 1920 * 2. If greater, fall back + // to vp9_scale_and_extend_frame_c(). + if (dst_w/2 <= 1920) { + upsample_1_to_2_ssse3(src->y_buffer, src->y_stride, + dst->y_buffer, dst->y_stride, dst_w, dst_h); + upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride, + dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); + upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride, + dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); + vpx_extend_frame_borders(dst); + } else { + vp9_scale_and_extend_frame_c(src, dst); + } + } else { + vp9_scale_and_extend_frame_c(src, dst); + } +} diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c index 6ccba0f8c..10d68939d 100644 --- a/libvpx/vp9/vp9_cx_iface.c +++ b/libvpx/vp9/vp9_cx_iface.c @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "vpx/vpx_encoder.h" #include "vpx_ports/vpx_once.h" +#include "vpx_ports/system_state.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vpx_version.h" #include "vp9/encoder/vp9_encoder.h" @@ -39,6 +40,7 @@ struct vp9_extracfg { unsigned int rc_max_inter_bitrate_pct; unsigned int gf_cbr_boost_pct; unsigned int lossless; + unsigned int target_level; unsigned int frame_parallel_decoding_mode; AQ_MODE aq_mode; unsigned int frame_periodic_boost; @@ -68,6 +70,7 @@ static struct vp9_extracfg default_extra_cfg = { 0, // rc_max_inter_bitrate_pct 0, // gf_cbr_boost_pct 0, // lossless + 255, // target_level 1, // frame_parallel_decoding_mode NO_AQ, // aq_mode 0, // frame_periodic_delta_q @@ -157,7 +160,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); - RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); + RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); @@ -195,6 +198,17 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); + { + unsigned int level = extra_cfg->target_level; + if (level != LEVEL_1 && level != LEVEL_1_1 && level != LEVEL_2 && + level != LEVEL_2_1 && level != LEVEL_3 && level != LEVEL_3_1 && + level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 && + level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 && + level != LEVEL_6_1 && level != LEVEL_6_2 && + level != LEVEL_UNKNOWN && level != LEVEL_MAX) + ERROR("target_level is invalid"); + } + if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS) ERROR("ss_number_layers * ts_number_layers is out of range"); if (cfg->ts_number_layers > 1) { @@ -485,7 +499,16 @@ static vpx_codec_err_t set_encoder_config( oxcf->content = extra_cfg->content; oxcf->tile_columns = extra_cfg->tile_columns; - oxcf->tile_rows = extra_cfg->tile_rows; + + // TODO(yunqing): The dependencies between row tiles cause error in multi- + // threaded encoding. For now, tile_rows is forced to be 0 in this case. + // The further fix can be done by adding synchronizations after a tile row + // is encoded. But this will hurt multi-threaded encoder performance. So, + // it is recommended to use tile-rows=0 while encoding with threads > 1. + if (oxcf->max_threads > 1 && oxcf->tile_columns > 0) + oxcf->tile_rows = 0; + else + oxcf->tile_rows = extra_cfg->tile_rows; oxcf->error_resilient_mode = cfg->g_error_resilient; oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; @@ -499,6 +522,8 @@ static vpx_codec_err_t set_encoder_config( oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode) cfg->temporal_layering_mode; + oxcf->target_level = extra_cfg->target_level; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { #if CONFIG_SPATIAL_SVC oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl]; @@ -525,6 +550,7 @@ static vpx_codec_err_t set_encoder_config( /* printf("Current VP9 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); + printf("target_level: %d\n", oxcf->target_level); printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity); printf("sharpness: %d\n", oxcf->sharpness); printf("cpu_used: %d\n", oxcf->cpu_used); @@ -774,6 +800,20 @@ static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.target_level = CAST(VP9E_SET_TARGET_LEVEL, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_get_level(vpx_codec_alg_priv_t *ctx, va_list args) { + int *const arg = va_arg(args, int *); + if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + *arg = (int)vp9_get_level(&ctx->cpi->level_info.level_spec); + return VPX_CODEC_OK; +} + static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { vpx_codec_err_t res = VPX_CODEC_OK; @@ -865,6 +905,11 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, break; } + if (deadline == VPX_DL_REALTIME) { + ctx->oxcf.pass = 0; + new_mode = REALTIME; + } + if (ctx->oxcf.mode != new_mode) { ctx->oxcf.mode = new_mode; vp9_change_config(ctx->cpi, &ctx->oxcf); @@ -931,9 +976,6 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -// vp9 uses 10,000,000 ticks/second as time stamp -#define TICKS_PER_SEC 10000000LL - static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase, int64_t n) { return n * TICKS_PER_SEC * timebase->num / timebase->den; @@ -941,7 +983,7 @@ static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase, static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase, int64_t n) { - const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1; + const int64_t round = (int64_t)TICKS_PER_SEC * timebase->num / 2 - 1; return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC; } @@ -963,28 +1005,30 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, return flags; } +const size_t kMinCompressedSize = 8192; static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, - vpx_enc_frame_flags_t flags, + vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { - vpx_codec_err_t res = VPX_CODEC_OK; + volatile vpx_codec_err_t res = VPX_CODEC_OK; + volatile vpx_enc_frame_flags_t flags = enc_flags; VP9_COMP *const cpi = ctx->cpi; const vpx_rational_t *const timebase = &ctx->cfg.g_timebase; size_t data_sz; + if (cpi == NULL) return VPX_CODEC_INVALID_PARAM; + if (img != NULL) { res = validate_img(ctx, img); - // TODO(jzern) the checks related to cpi's validity should be treated as a - // failure condition, encoder setup is done fully in init() currently. - if (res == VPX_CODEC_OK && cpi != NULL) { + if (res == VPX_CODEC_OK) { // There's no codec control for multiple alt-refs so check the encoder // instance for its status to determine the compressed data size. data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 * (cpi->multi_arf_allowed ? 8 : 2); - if (data_sz < 4096) - data_sz = 4096; + if (data_sz < kMinCompressedSize) + data_sz = kMinCompressedSize; if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) { ctx->cx_data_sz = data_sz; free(ctx->cx_data); @@ -1006,6 +1050,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } + if (setjmp(cpi->common.error.jmp)) { + cpi->common.error.setjmp = 0; + res = update_error_state(ctx, &cpi->common.error); + vpx_clear_system_state(); + return res; + } + cpi->common.error.setjmp = 1; + vp9_apply_encoding_flags(cpi, flags); // Handle fixed keyframe intervals @@ -1017,8 +1069,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } } - // Initialize the encoder instance on the first frame. - if (res == VPX_CODEC_OK && cpi != NULL) { + if (res == VPX_CODEC_OK) { unsigned int lib_flags = 0; YV12_BUFFER_CONFIG sd; int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts); @@ -1057,7 +1108,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, * the buffer size anyway. */ if (cx_data_sz < ctx->cx_data_sz / 2) { - ctx->base.err_detail = "Compressed data buffer too small"; + vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR, + "Compressed data buffer too small"); return VPX_CODEC_ERROR; } } @@ -1175,6 +1227,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } } + cpi->common.error.setjmp = 0; return res; } @@ -1393,8 +1446,8 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers); LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - lc->max_q = params->max_quantizers[sl]; - lc->min_q = params->min_quantizers[sl]; + lc->max_q = params->max_quantizers[layer]; + lc->min_q = params->min_quantizers[layer]; lc->scaling_factor_num = params->scaling_factor_num[sl]; lc->scaling_factor_den = params->scaling_factor_den[sl]; } @@ -1496,6 +1549,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval}, {VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config}, {VP9E_SET_RENDER_SIZE, ctrl_set_render_size}, + {VP9E_SET_TARGET_LEVEL, ctrl_set_target_level}, // Getters {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, @@ -1503,6 +1557,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9_GET_REFERENCE, ctrl_get_reference}, {VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id}, {VP9E_GET_ACTIVEMAP, ctrl_get_active_map}, + {VP9E_GET_LEVEL, ctrl_get_level}, { -1, NULL}, }; @@ -1555,7 +1610,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { // keyframing settings (kf) VPX_KF_AUTO, // g_kfmode 0, // kf_min_dist - 9999, // kf_max_dist + 128, // kf_max_dist VPX_SS_DEFAULT_LAYERS, // ss_number_layers {0}, diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c index be5d1600a..6531e2c61 100644 --- a/libvpx/vp9/vp9_dx_iface.c +++ b/libvpx/vp9/vp9_dx_iface.c @@ -127,7 +127,7 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { int intra_only_flag = 0; - uint8_t clear_buffer[9]; + uint8_t clear_buffer[10]; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; @@ -141,6 +141,11 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, data = clear_buffer; } + // A maximum of 6 bits are needed to read the frame marker, profile and + // show_existing_frame. + if (data_sz < 1) + return VPX_CODEC_UNSUP_BITSTREAM; + { int show_frame; int error_resilient; @@ -154,15 +159,19 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, if (profile >= MAX_PROFILES) return VPX_CODEC_UNSUP_BITSTREAM; - if ((profile >= 2 && data_sz <= 1) || data_sz < 1) - return VPX_CODEC_UNSUP_BITSTREAM; - if (vpx_rb_read_bit(&rb)) { // show an existing frame + // If profile is > 2 and show_existing_frame is true, then at least 1 more + // byte (6+3=9 bits) is needed. + if (profile > 2 && data_sz < 2) + return VPX_CODEC_UNSUP_BITSTREAM; vpx_rb_read_literal(&rb, 3); // Frame buffer to show. return VPX_CODEC_OK; } - if (data_sz <= 8) + // For the rest of the function, a maximum of 9 more bytes are needed + // (computed by taking the maximum possible bits needed in each case). Note + // that this has to be updated if we read any more bits in this function. + if (data_sz < 10) return VPX_CODEC_UNSUP_BITSTREAM; si->is_kf = !vpx_rb_read_bit(&rb); diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk index 25a176f81..5f3de8f8a 100644 --- a/libvpx/vp9/vp9cx.mk +++ b/libvpx/vp9/vp9cx.mk @@ -17,7 +17,6 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c -VP9_CX_SRCS-yes += encoder/vp9_avg.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.h @@ -76,12 +75,16 @@ VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h +VP9_CX_SRCS-yes += encoder/vp9_aq_360.c +VP9_CX_SRCS-yes += encoder/vp9_aq_360.h VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h +VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c +VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c @@ -91,15 +94,15 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c +VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c endif ifeq ($(CONFIG_USE_X86INC),yes) -VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm @@ -111,12 +114,14 @@ endif ifeq ($(ARCH_X86_64),yes) ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm -VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm endif endif -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c +endif ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c @@ -128,10 +133,8 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_error_neon.c endif -VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c -VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c diff --git a/libvpx/vpx/exports_enc b/libvpx/vpx/exports_enc index e4707ba10..914e36cd4 100644 --- a/libvpx/vpx/exports_enc +++ b/libvpx/vpx/exports_enc @@ -7,9 +7,3 @@ text vpx_codec_get_cx_data text vpx_codec_get_global_headers text vpx_codec_get_preview_frame text vpx_codec_set_cx_data_buf -text vpx_svc_dump_statistics -text vpx_svc_encode -text vpx_svc_get_message -text vpx_svc_init -text vpx_svc_release -text vpx_svc_set_options diff --git a/libvpx/vpx/exports_spatial_svc b/libvpx/vpx/exports_spatial_svc new file mode 100644 index 000000000..d258a1d61 --- /dev/null +++ b/libvpx/vpx/exports_spatial_svc @@ -0,0 +1,6 @@ +text vpx_svc_dump_statistics +text vpx_svc_encode +text vpx_svc_get_message +text vpx_svc_init +text vpx_svc_release +text vpx_svc_set_options diff --git a/libvpx/vpx/src/svc_encodeframe.c b/libvpx/vpx/src/svc_encodeframe.c index ff600830e..ef9b3528a 100644 --- a/libvpx/vpx/src/svc_encodeframe.c +++ b/libvpx/vpx/src/svc_encodeframe.c @@ -322,8 +322,7 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { if (si->svc_params.scaling_factor_den[sl] > 0) { - alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] * - 1.0 / si->svc_params.scaling_factor_den[sl]); + alloc_ratio[sl] = (float)( pow(2, sl) ); total += alloc_ratio[sl]; } } @@ -334,9 +333,9 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, alloc_ratio[sl] / total); if (svc_ctx->temporal_layering_mode == 3) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = - spatial_layer_target >> 1; + (spatial_layer_target*6)/10; // 60% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = - (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + (spatial_layer_target*8)/10; // 80% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = spatial_layer_target; } else if (svc_ctx->temporal_layering_mode == 2 || @@ -385,7 +384,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *enc_cfg) { vpx_codec_err_t res; - int i; + int i, sl , tl; SvcInternal_t *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || enc_cfg == NULL) { @@ -398,11 +397,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->width = enc_cfg->g_w; si->height = enc_cfg->g_h; - if (enc_cfg->kf_max_dist < 2) { - svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n", - enc_cfg->kf_max_dist); - return VPX_CODEC_INVALID_PARAM; - } si->kf_dist = enc_cfg->kf_max_dist; if (svc_ctx->spatial_layers == 0) @@ -423,11 +417,16 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_ctx->temporal_layers = 2; } - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { - si->svc_params.max_quantizers[i] = MAX_QUANTIZER; - si->svc_params.min_quantizers[i] = 0; - si->svc_params.scaling_factor_num[i] = DEFAULT_SCALE_FACTORS_NUM[i]; - si->svc_params.scaling_factor_den[i] = DEFAULT_SCALE_FACTORS_DEN[i]; + for (sl = 0; sl < VPX_SS_MAX_LAYERS; ++sl) { + si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM[sl]; + si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl]; + } + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + i = sl * svc_ctx->temporal_layers + tl; + si->svc_params.max_quantizers[i] = MAX_QUANTIZER; + si->svc_params.min_quantizers[i] = 0; + } } // Parse aggregate command line options. Options must start with @@ -485,6 +484,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->rc_buf_initial_sz = 500; enc_cfg->rc_buf_optimal_sz = 600; enc_cfg->rc_buf_sz = 1000; + enc_cfg->rc_dropframe_thresh = 0; } if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) @@ -571,6 +571,27 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, } #endif #endif + case VPX_CODEC_PSNR_PKT: + { +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) + int j; + svc_log(svc_ctx, SVC_LOG_DEBUG, + "frame: %d, layer: %d, PSNR(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->psnr_pkt_received, 0, + cx_pkt->data.layer_psnr[0].psnr[0], + cx_pkt->data.layer_psnr[0].psnr[1], + cx_pkt->data.layer_psnr[0].psnr[2], + cx_pkt->data.layer_psnr[0].psnr[3]); + for (j = 0; j < COMPONENTS; ++j) { + si->psnr_sum[0][j] += + cx_pkt->data.layer_psnr[0].psnr[j]; + si->sse_sum[0][j] += cx_pkt->data.layer_psnr[0].sse[j]; + } +#endif + } + ++si->psnr_pkt_received; + break; default: { break; } diff --git a/libvpx/vpx/vp8cx.h b/libvpx/vpx/vp8cx.h index bd99c6dc1..61882e650 100644 --- a/libvpx/vpx/vp8cx.h +++ b/libvpx/vpx/vp8cx.h @@ -45,15 +45,6 @@ extern vpx_codec_iface_t vpx_codec_vp9_cx_algo; extern vpx_codec_iface_t *vpx_codec_vp9_cx(void); /*!@} - end algorithm interface member group*/ -/*!\name Algorithm interface for VP10 - * - * This interface provides the capability to encode raw VP9 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp10_cx_algo; -extern vpx_codec_iface_t *vpx_codec_vp10_cx(void); -/*!@} - end algorithm interface member group*/ - /* * Algorithm Flags */ @@ -554,6 +545,21 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_SET_RENDER_SIZE, + + /*!\brief Codec control function to set target level. + * + * 255: off (default); 0: only keep level stats; 10: target for level 1.0; + * 11: target for level 1.1; ... 62: target for level 6.2 + * + * Supported in codecs: VP9 + */ + VP9E_SET_TARGET_LEVEL, + + /*!\brief Codec control function to get bitstream level. + * + * Supported in codecs: VP9 + */ + VP9E_GET_LEVEL }; /*!\brief vpx 1-D scaling mode @@ -809,6 +815,12 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) #define VPX_CTRL_VP9E_SET_RENDER_SIZE +VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL, unsigned int) +#define VPX_CTRL_VP9E_SET_TARGET_LEVEL + +VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *) +#define VPX_CTRL_VP9E_GET_LEVEL + /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus diff --git a/libvpx/vpx/vp8dx.h b/libvpx/vpx/vp8dx.h index 1f02fd595..67c97bb6c 100644 --- a/libvpx/vpx/vp8dx.h +++ b/libvpx/vpx/vp8dx.h @@ -46,15 +46,6 @@ extern vpx_codec_iface_t vpx_codec_vp9_dx_algo; extern vpx_codec_iface_t *vpx_codec_vp9_dx(void); /*!@} - end algorithm interface member group*/ -/*!\name Algorithm interface for VP10 - * - * This interface provides the capability to decode VP10 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp10_dx_algo; -extern vpx_codec_iface_t *vpx_codec_vp10_dx(void); -/*!@} - end algorithm interface member group*/ - /*!\enum vp8_dec_control_id * \brief VP8 decoder control functions * diff --git a/libvpx/vpx/vpx_image.h b/libvpx/vpx/vpx_image.h index e9e952c48..7958c6980 100644 --- a/libvpx/vpx/vpx_image.h +++ b/libvpx/vpx/vpx_image.h @@ -28,7 +28,7 @@ extern "C" { * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ -#define VPX_IMAGE_ABI_VERSION (3) /**<\hideinitializer*/ +#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ #define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ diff --git a/libvpx/vpx_dsp/add_noise.c b/libvpx/vpx_dsp/add_noise.c new file mode 100644 index 000000000..682b44419 --- /dev/null +++ b/libvpx/vpx_dsp/add_noise.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +void vpx_plane_add_noise_c(uint8_t *start, char *noise, + char blackclamp[16], + char whiteclamp[16], + char bothclamp[16], + unsigned int width, unsigned int height, int pitch) { + unsigned int i, j; + + for (i = 0; i < height; i++) { + uint8_t *pos = start + i * pitch; + char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT + + for (j = 0; j < width; j++) { + int v = pos[j]; + + v = clamp(v - blackclamp[0], 0, 255); + v = clamp(v + bothclamp[0], 0, 255); + v = clamp(v - whiteclamp[0], 0, 255); + + pos[j] = v + ref[j]; + } + } +} diff --git a/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c b/libvpx/vpx_dsp/arm/avg_neon.c index d569ec95d..e52958c54 100644 --- a/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c +++ b/libvpx/vpx_dsp/arm/avg_neon.c @@ -11,7 +11,7 @@ #include <arm_neon.h> #include <assert.h> -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -24,7 +24,19 @@ static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) { return vget_lane_u32(c, 0); } -unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) { +unsigned int vpx_avg_4x4_neon(const uint8_t *s, int p) { + uint16x8_t v_sum; + uint32x2_t v_s0 = vdup_n_u32(0); + uint32x2_t v_s1 = vdup_n_u32(0); + v_s0 = vld1_lane_u32((const uint32_t *)s, v_s0, 0); + v_s0 = vld1_lane_u32((const uint32_t *)(s + p), v_s0, 1); + v_s1 = vld1_lane_u32((const uint32_t *)(s + 2 * p), v_s1, 0); + v_s1 = vld1_lane_u32((const uint32_t *)(s + 3 * p), v_s1, 1); + v_sum = vaddl_u8(vreinterpret_u8_u32(v_s0), vreinterpret_u8_u32(v_s1)); + return (horizontal_add_u16x8(v_sum) + 8) >> 4; +} + +unsigned int vpx_avg_8x8_neon(const uint8_t *s, int p) { uint8x8_t v_s0 = vld1_u8(s); const uint8x8_t v_s1 = vld1_u8(s + p); uint16x8_t v_sum = vaddl_u8(v_s0, v_s1); @@ -50,7 +62,34 @@ unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) { return (horizontal_add_u16x8(v_sum) + 32) >> 6; } -void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref, +// coeff: 16 bits, dynamic range [-32640, 32640]. +// length: value range {16, 64, 256, 1024}. +int vpx_satd_neon(const int16_t *coeff, int length) { + const int16x4_t zero = vdup_n_s16(0); + int32x4_t accum = vdupq_n_s32(0); + + do { + const int16x8_t src0 = vld1q_s16(coeff); + const int16x8_t src8 = vld1q_s16(coeff + 8); + accum = vabal_s16(accum, vget_low_s16(src0), zero); + accum = vabal_s16(accum, vget_high_s16(src0), zero); + accum = vabal_s16(accum, vget_low_s16(src8), zero); + accum = vabal_s16(accum, vget_high_s16(src8), zero); + length -= 16; + coeff += 16; + } while (length != 0); + + { + // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] + const int64x2_t s0 = vpaddlq_s32(accum); // cascading summation of 'accum'. + const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)), + vreinterpret_s32_s64(vget_high_s64(s0))); + const int satd = vget_lane_s32(s1, 0); + return satd; + } +} + +void vpx_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref, const int ref_stride, const int height) { int i; uint16x8_t vec_sum_lo = vdupq_n_u16(0); @@ -103,7 +142,7 @@ void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref, vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi)); } -int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) { +int16_t vpx_int_pro_col_neon(uint8_t const *ref, const int width) { int i; uint16x8_t vec_sum = vdupq_n_u16(0); @@ -119,7 +158,7 @@ int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) { // ref, src = [0, 510] - max diff = 16-bits // bwl = {2, 3, 4}, width = {16, 32, 64} -int vp9_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) { +int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) { int width = 4 << bwl; int32x4_t sse = vdupq_n_s32(0); int16x8_t total = vdupq_n_s16(0); @@ -158,3 +197,60 @@ int vp9_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) { return s - ((t * t) >> shift_factor); } } + +void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int *min, int *max) { + // Load and concatenate. + const uint8x16_t a01 = vcombine_u8(vld1_u8(a), + vld1_u8(a + a_stride)); + const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride), + vld1_u8(a + 3 * a_stride)); + const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride), + vld1_u8(a + 5 * a_stride)); + const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride), + vld1_u8(a + 7 * a_stride)); + + const uint8x16_t b01 = vcombine_u8(vld1_u8(b), + vld1_u8(b + b_stride)); + const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride), + vld1_u8(b + 3 * b_stride)); + const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride), + vld1_u8(b + 5 * b_stride)); + const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride), + vld1_u8(b + 7 * b_stride)); + + // Absolute difference. + const uint8x16_t ab01_diff = vabdq_u8(a01, b01); + const uint8x16_t ab23_diff = vabdq_u8(a23, b23); + const uint8x16_t ab45_diff = vabdq_u8(a45, b45); + const uint8x16_t ab67_diff = vabdq_u8(a67, b67); + + // Max values between the Q vectors. + const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff); + const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff); + const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff); + const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff); + + const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max); + const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min); + + // Split to D and start doing pairwise. + uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max)); + uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min)); + + // Enough runs of vpmax/min propogate the max/min values to every position. + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + ab_max = vpmax_u8(ab_max, ab_max); + ab_min = vpmin_u8(ab_min, ab_min); + + *min = *max = 0; // Clear high bits + // Store directly to avoid costly neon->gpr transfer. + vst1_lane_u8((uint8_t *)max, ab_max, 0); + vst1_lane_u8((uint8_t *)min, ab_min, 0); +} diff --git a/libvpx/vpx_dsp/arm/hadamard_neon.c b/libvpx/vpx_dsp/arm/hadamard_neon.c new file mode 100644 index 000000000..21e3e3dba --- /dev/null +++ b/libvpx/vpx_dsp/arm/hadamard_neon.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +#include "./vpx_dsp_rtcd.h" + +static void hadamard8x8_one_pass(int16x8_t *a0, int16x8_t *a1, + int16x8_t *a2, int16x8_t *a3, + int16x8_t *a4, int16x8_t *a5, + int16x8_t *a6, int16x8_t *a7) { + const int16x8_t b0 = vaddq_s16(*a0, *a1); + const int16x8_t b1 = vsubq_s16(*a0, *a1); + const int16x8_t b2 = vaddq_s16(*a2, *a3); + const int16x8_t b3 = vsubq_s16(*a2, *a3); + const int16x8_t b4 = vaddq_s16(*a4, *a5); + const int16x8_t b5 = vsubq_s16(*a4, *a5); + const int16x8_t b6 = vaddq_s16(*a6, *a7); + const int16x8_t b7 = vsubq_s16(*a6, *a7); + + const int16x8_t c0 = vaddq_s16(b0, b2); + const int16x8_t c1 = vaddq_s16(b1, b3); + const int16x8_t c2 = vsubq_s16(b0, b2); + const int16x8_t c3 = vsubq_s16(b1, b3); + const int16x8_t c4 = vaddq_s16(b4, b6); + const int16x8_t c5 = vaddq_s16(b5, b7); + const int16x8_t c6 = vsubq_s16(b4, b6); + const int16x8_t c7 = vsubq_s16(b5, b7); + + *a0 = vaddq_s16(c0, c4); + *a1 = vsubq_s16(c2, c6); + *a2 = vsubq_s16(c0, c4); + *a3 = vaddq_s16(c2, c6); + *a4 = vaddq_s16(c3, c7); + *a5 = vsubq_s16(c3, c7); + *a6 = vsubq_s16(c1, c5); + *a7 = vaddq_s16(c1, c5); +} + +// TODO(johannkoenig): Make a transpose library and dedup with idct. Consider +// reversing transpose order which may make it easier for the compiler to +// reconcile the vtrn.64 moves. +static void transpose8x8(int16x8_t *a0, int16x8_t *a1, + int16x8_t *a2, int16x8_t *a3, + int16x8_t *a4, int16x8_t *a5, + int16x8_t *a6, int16x8_t *a7) { + // Swap 64 bit elements. Goes from: + // a0: 00 01 02 03 04 05 06 07 + // a1: 08 09 10 11 12 13 14 15 + // a2: 16 17 18 19 20 21 22 23 + // a3: 24 25 26 27 28 29 30 31 + // a4: 32 33 34 35 36 37 38 39 + // a5: 40 41 42 43 44 45 46 47 + // a6: 48 49 50 51 52 53 54 55 + // a7: 56 57 58 59 60 61 62 63 + // to: + // a04_lo: 00 01 02 03 32 33 34 35 + // a15_lo: 08 09 10 11 40 41 42 43 + // a26_lo: 16 17 18 19 48 49 50 51 + // a37_lo: 24 25 26 27 56 57 58 59 + // a04_hi: 04 05 06 07 36 37 38 39 + // a15_hi: 12 13 14 15 44 45 46 47 + // a26_hi: 20 21 22 23 52 53 54 55 + // a37_hi: 28 29 30 31 60 61 62 63 + const int16x8_t a04_lo = vcombine_s16(vget_low_s16(*a0), vget_low_s16(*a4)); + const int16x8_t a15_lo = vcombine_s16(vget_low_s16(*a1), vget_low_s16(*a5)); + const int16x8_t a26_lo = vcombine_s16(vget_low_s16(*a2), vget_low_s16(*a6)); + const int16x8_t a37_lo = vcombine_s16(vget_low_s16(*a3), vget_low_s16(*a7)); + const int16x8_t a04_hi = vcombine_s16(vget_high_s16(*a0), vget_high_s16(*a4)); + const int16x8_t a15_hi = vcombine_s16(vget_high_s16(*a1), vget_high_s16(*a5)); + const int16x8_t a26_hi = vcombine_s16(vget_high_s16(*a2), vget_high_s16(*a6)); + const int16x8_t a37_hi = vcombine_s16(vget_high_s16(*a3), vget_high_s16(*a7)); + + // Swap 32 bit elements resulting in: + // a0246_lo: + // 00 01 16 17 32 33 48 49 + // 02 03 18 19 34 35 50 51 + // a1357_lo: + // 08 09 24 25 40 41 56 57 + // 10 11 26 27 42 43 58 59 + // a0246_hi: + // 04 05 20 21 36 37 52 53 + // 06 07 22 23 38 39 54 55 + // a1657_hi: + // 12 13 28 29 44 45 60 61 + // 14 15 30 31 46 47 62 63 + const int32x4x2_t a0246_lo = vtrnq_s32(vreinterpretq_s32_s16(a04_lo), + vreinterpretq_s32_s16(a26_lo)); + const int32x4x2_t a1357_lo = vtrnq_s32(vreinterpretq_s32_s16(a15_lo), + vreinterpretq_s32_s16(a37_lo)); + const int32x4x2_t a0246_hi = vtrnq_s32(vreinterpretq_s32_s16(a04_hi), + vreinterpretq_s32_s16(a26_hi)); + const int32x4x2_t a1357_hi = vtrnq_s32(vreinterpretq_s32_s16(a15_hi), + vreinterpretq_s32_s16(a37_hi)); + + // Swap 16 bit elements resulting in: + // b0: + // 00 08 16 24 32 40 48 56 + // 01 09 17 25 33 41 49 57 + // b1: + // 02 10 18 26 34 42 50 58 + // 03 11 19 27 35 43 51 59 + // b2: + // 04 12 20 28 36 44 52 60 + // 05 13 21 29 37 45 53 61 + // b3: + // 06 14 22 30 38 46 54 62 + // 07 15 23 31 39 47 55 63 + const int16x8x2_t b0 = vtrnq_s16(vreinterpretq_s16_s32(a0246_lo.val[0]), + vreinterpretq_s16_s32(a1357_lo.val[0])); + const int16x8x2_t b1 = vtrnq_s16(vreinterpretq_s16_s32(a0246_lo.val[1]), + vreinterpretq_s16_s32(a1357_lo.val[1])); + const int16x8x2_t b2 = vtrnq_s16(vreinterpretq_s16_s32(a0246_hi.val[0]), + vreinterpretq_s16_s32(a1357_hi.val[0])); + const int16x8x2_t b3 = vtrnq_s16(vreinterpretq_s16_s32(a0246_hi.val[1]), + vreinterpretq_s16_s32(a1357_hi.val[1])); + + *a0 = b0.val[0]; + *a1 = b0.val[1]; + *a2 = b1.val[0]; + *a3 = b1.val[1]; + *a4 = b2.val[0]; + *a5 = b2.val[1]; + *a6 = b3.val[0]; + *a7 = b3.val[1]; +} + +void vpx_hadamard_8x8_neon(const int16_t *src_diff, int src_stride, + int16_t *coeff) { + int16x8_t a0 = vld1q_s16(src_diff); + int16x8_t a1 = vld1q_s16(src_diff + src_stride); + int16x8_t a2 = vld1q_s16(src_diff + 2 * src_stride); + int16x8_t a3 = vld1q_s16(src_diff + 3 * src_stride); + int16x8_t a4 = vld1q_s16(src_diff + 4 * src_stride); + int16x8_t a5 = vld1q_s16(src_diff + 5 * src_stride); + int16x8_t a6 = vld1q_s16(src_diff + 6 * src_stride); + int16x8_t a7 = vld1q_s16(src_diff + 7 * src_stride); + + hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); + + transpose8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); + + hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); + + // Skip the second transpose because it is not required. + + vst1q_s16(coeff + 0, a0); + vst1q_s16(coeff + 8, a1); + vst1q_s16(coeff + 16, a2); + vst1q_s16(coeff + 24, a3); + vst1q_s16(coeff + 32, a4); + vst1q_s16(coeff + 40, a5); + vst1q_s16(coeff + 48, a6); + vst1q_s16(coeff + 56, a7); +} + +void vpx_hadamard_16x16_neon(const int16_t *src_diff, int src_stride, + int16_t *coeff) { + int i; + + /* Rearrange 16x16 to 8x32 and remove stride. + * Top left first. */ + vpx_hadamard_8x8_neon(src_diff + 0 + 0 * src_stride, src_stride, coeff + 0); + /* Top right. */ + vpx_hadamard_8x8_neon(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64); + /* Bottom left. */ + vpx_hadamard_8x8_neon(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128); + /* Bottom right. */ + vpx_hadamard_8x8_neon(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192); + + for (i = 0; i < 64; i += 8) { + const int16x8_t a0 = vld1q_s16(coeff + 0); + const int16x8_t a1 = vld1q_s16(coeff + 64); + const int16x8_t a2 = vld1q_s16(coeff + 128); + const int16x8_t a3 = vld1q_s16(coeff + 192); + + const int16x8_t b0 = vhaddq_s16(a0, a1); + const int16x8_t b1 = vhsubq_s16(a0, a1); + const int16x8_t b2 = vhaddq_s16(a2, a3); + const int16x8_t b3 = vhsubq_s16(a2, a3); + + const int16x8_t c0 = vaddq_s16(b0, b2); + const int16x8_t c1 = vaddq_s16(b1, b3); + const int16x8_t c2 = vsubq_s16(b0, b2); + const int16x8_t c3 = vsubq_s16(b1, b3); + + vst1q_s16(coeff + 0, c0); + vst1q_s16(coeff + 64, c1); + vst1q_s16(coeff + 128, c2); + vst1q_s16(coeff + 192, c3); + + coeff += 8; + } +} diff --git a/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm index e45e34cd4..937115898 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm @@ -16,37 +16,28 @@ ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_horizontal_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count ldr r2, [sp, #4] ; load thresh add r1, r1, r1 ; double pitch - cmp r12, #0 - beq end_vpx_lf_h_edge - vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh -count_lf_h_loop sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines add r3, r2, r1, lsr #1 ; set to 3 lines down @@ -69,47 +60,34 @@ count_lf_h_loop vst1.u8 {d6}, [r2@64], r1 ; store oq0 vst1.u8 {d7}, [r3@64], r1 ; store oq1 - add r0, r0, #8 - subs r12, r12, #1 - bne count_lf_h_loop - -end_vpx_lf_h_edge pop {pc} ENDP ; |vpx_lpf_horizontal_4_neon| ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_vertical_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #4] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns - cmp r12, #0 - beq end_vpx_lf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh -count_lf_v_loop vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 @@ -149,12 +127,6 @@ count_lf_v_loop vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1 vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0] - add r0, r0, r1, lsl #3 ; s += pitch * 8 - subs r12, r12, #1 - subne r2, r0, #4 ; move s pointer down by 4 columns - bne count_lf_v_loop - -end_vpx_lf_v_edge pop {pc} ENDP ; |vpx_lpf_vertical_4_neon| diff --git a/libvpx/vpx_dsp/arm/loopfilter_4_neon.c b/libvpx/vpx_dsp/arm/loopfilter_4_neon.c index 7ad411aea..7f3ee70b9 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_4_neon.c +++ b/libvpx/vpx_dsp/arm/loopfilter_4_neon.c @@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s, *psrc; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); psrc = src - (pitch << 2); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = psrc + i * 8; d3u8 = vld1_u8(s); @@ -170,8 +166,7 @@ void vpx_lpf_vertical_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i, pitch8; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; @@ -181,15 +176,12 @@ void vpx_lpf_vertical_4_neon( uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); pitch8 = pitch * 8; - for (i = 0; i < count; i++, src += pitch8) { + for (i = 0; i < 1; i++, src += pitch8) { s = src - (i + 1) * 4; d3u8 = vld1_u8(s); diff --git a/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm index e81734c04..a2f20e15f 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm +++ b/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm @@ -16,35 +16,26 @@ ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_horizontal_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #16] ; load count ldr r2, [sp, #12] ; load thresh add r1, r1, r1 ; double pitch - cmp r12, #0 - beq end_vpx_mblf_h_edge - vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh -count_mblf_h_loop sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines add r2, r3, r1, lsr #1 ; set to 3 lines down @@ -69,11 +60,6 @@ count_mblf_h_loop vst1.u8 {d4}, [r2@64], r1 ; store oq1 vst1.u8 {d5}, [r3@64], r1 ; store oq2 - add r0, r0, #8 - subs r12, r12, #1 - bne count_mblf_h_loop - -end_vpx_mblf_h_edge pop {r4-r5, pc} ENDP ; |vpx_lpf_horizontal_8_neon| @@ -82,30 +68,24 @@ end_vpx_mblf_h_edge ; int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int pitch, ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_vertical_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #16] ; load count vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #12] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns - cmp r12, #0 - beq end_vpx_mblf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh -count_mblf_v_loop vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 @@ -156,12 +136,6 @@ count_mblf_v_loop vst2.8 {d4[6], d5[6]}, [r3], r1 vst2.8 {d4[7], d5[7]}, [r3] - add r0, r0, r1, lsl #3 ; s += pitch * 8 - subs r12, r12, #1 - subne r2, r0, #4 ; move s pointer down by 4 columns - bne count_mblf_v_loop - -end_vpx_mblf_v_edge pop {r4-r5, pc} ENDP ; |vpx_lpf_vertical_8_neon| diff --git a/libvpx/vpx_dsp/arm/loopfilter_8_neon.c b/libvpx/vpx_dsp/arm/loopfilter_8_neon.c index a887e2ee5..ec3757380 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_8_neon.c +++ b/libvpx/vpx_dsp/arm/loopfilter_8_neon.c @@ -268,23 +268,19 @@ void vpx_lpf_horizontal_8_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s, *psrc; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; uint8x8_t d16u8, d17u8, d18u8; - if (count == 0) // end_vpx_mblf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); psrc = src - (pitch << 2); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = psrc + i * 8; d3u8 = vld1_u8(s); @@ -328,8 +324,7 @@ void vpx_lpf_vertical_8_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; @@ -341,14 +336,11 @@ void vpx_lpf_vertical_8_neon( uint8x8x4_t d4Result; uint8x8x2_t d2Result; - if (count == 0) - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = src + (i * (pitch << 3)) - 4; d3u8 = vld1_u8(s); diff --git a/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm index 20d9cfb11..d5da7a840 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm +++ b/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm @@ -8,27 +8,28 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vpx_lpf_horizontal_16_neon| + EXPORT |vpx_lpf_horizontal_edge_8_neon| + EXPORT |vpx_lpf_horizontal_edge_16_neon| EXPORT |vpx_lpf_vertical_16_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh -; int count) +; void mb_lpf_horizontal_edge(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vpx_lpf_horizontal_16_neon| PROC +; r12 int count +|mb_lpf_horizontal_edge| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh - ldr r12, [sp, #92] ; load count h_count vld1.8 {d16[]}, [r2] ; load *blimit @@ -115,7 +116,35 @@ h_next vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vpx_lpf_horizontal_16_neon| + ENDP ; |mb_lpf_horizontal_edge| + +; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_8_neon| PROC + mov r12, #1 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_8_neon| + +; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_16_neon| PROC + mov r12, #2 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_16_neon| ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, ; const uint8_t *blimit, diff --git a/libvpx/vpx_dsp/arm/loopfilter_neon.c b/libvpx/vpx_dsp/arm/loopfilter_neon.c index eff87d29b..aa31f2935 100644 --- a/libvpx/vpx_dsp/arm/loopfilter_neon.c +++ b/libvpx/vpx_dsp/arm/loopfilter_neon.c @@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); } #if HAVE_NEON_ASM @@ -33,8 +33,8 @@ void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, @@ -44,8 +44,8 @@ void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, diff --git a/libvpx/vp9/encoder/vp9_avg.c b/libvpx/vpx_dsp/avg.c index a9a4c3050..a8c996663 100644 --- a/libvpx/vp9/encoder/vp9_avg.c +++ b/libvpx/vpx_dsp/avg.c @@ -7,11 +7,12 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_common.h" +#include <stdlib.h> + +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { +unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) { int i, j; int sum = 0; for (i = 0; i < 8; ++i, s+=p) @@ -20,7 +21,7 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { return (sum + 32) >> 6; } -unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) { +unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { int i, j; int sum = 0; for (i = 0; i < 4; ++i, s+=p) @@ -61,7 +62,9 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride, coeff[5] = c3 - c7; } -void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, +// The order of the output coeff of the hadamard is not important. For +// optimization purposes the final transpose may be skipped. +void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride, int16_t *coeff) { int idx; int16_t buffer[64]; @@ -84,14 +87,14 @@ void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, } // In place 16x16 2D Hadamard transform -void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, +void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride, int16_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] - int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; - vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); + vpx_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); } // coeff: 15 bit, dynamic range [-16320, 16320] @@ -117,19 +120,19 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. -int16_t vp9_satd_c(const int16_t *coeff, int length) { +int vpx_satd_c(const int16_t *coeff, int length) { int i; int satd = 0; for (i = 0; i < length; ++i) satd += abs(coeff[i]); // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] - return (int16_t)satd; + return satd; } // Integer projection onto row vectors. // height: value range {16, 32, 64}. -void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref, +void vpx_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height) { int idx; const int norm_factor = height >> 1; @@ -146,7 +149,7 @@ void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref, } // width: value range {16, 32, 64}. -int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) { +int16_t vpx_int_pro_col_c(const uint8_t *ref, const int width) { int idx; int16_t sum = 0; // sum: 14 bit, dynamic range [0, 16320] @@ -158,7 +161,7 @@ int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) { // ref: [0 - 510] // src: [0 - 510] // bwl: {2, 3, 4} -int vp9_vector_var_c(int16_t const *ref, int16_t const *src, +int vpx_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) { int i; int width = 4 << bwl; @@ -175,7 +178,7 @@ int vp9_vector_var_c(int16_t const *ref, int16_t const *src, return var; } -void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, +void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max) { int i, j; *min = 255; @@ -190,7 +193,7 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, } #if CONFIG_VP9_HIGHBITDEPTH -unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) { int i, j; int sum = 0; const uint16_t* s = CONVERT_TO_SHORTPTR(s8); @@ -200,7 +203,7 @@ unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { return (sum + 32) >> 6; } -unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) { int i, j; int sum = 0; const uint16_t* s = CONVERT_TO_SHORTPTR(s8); @@ -210,7 +213,7 @@ unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) { return (sum + 8) >> 4; } -void vp9_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, +void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max) { int i, j; const uint16_t* s = CONVERT_TO_SHORTPTR(s8); diff --git a/libvpx/vpx_dsp/bitreader.c b/libvpx/vpx_dsp/bitreader.c index 6ad806ac3..8140e78e7 100644 --- a/libvpx/vpx_dsp/bitreader.c +++ b/libvpx/vpx_dsp/bitreader.c @@ -69,7 +69,7 @@ void vpx_reader_fill(vpx_reader *r) { buffer += (bits >> 3); value = r->value | (nv << (shift & 0x7)); } else { - const int bits_over = (int)(shift + CHAR_BIT - bits_left); + const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left); int loop_end = 0; if (bits_over >= 0) { count += LOTS_OF_BITS; diff --git a/libvpx/vpx_dsp/bitreader.h b/libvpx/vpx_dsp/bitreader.h index e817c8b0c..9a441b410 100644 --- a/libvpx/vpx_dsp/bitreader.h +++ b/libvpx/vpx_dsp/bitreader.h @@ -98,7 +98,7 @@ static INLINE int vpx_read(vpx_reader *r, int prob) { } { - register unsigned int shift = vpx_norm[range]; + register int shift = vpx_norm[range]; range <<= shift; value <<= shift; count -= shift; diff --git a/libvpx/vpx_dsp/bitreader_buffer.c b/libvpx/vpx_dsp/bitreader_buffer.c index bb917263e..d7b55cf9f 100644 --- a/libvpx/vpx_dsp/bitreader_buffer.c +++ b/libvpx/vpx_dsp/bitreader_buffer.c @@ -45,7 +45,7 @@ int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits) { #if CONFIG_MISC_FIXES const int nbits = sizeof(unsigned) * 8 - bits - 1; - const unsigned value = vpx_rb_read_literal(rb, bits + 1) << nbits; + const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits; return ((int) value) >> nbits; #else return vpx_rb_read_signed_literal(rb, bits); diff --git a/libvpx/vpx_dsp/bitwriter.h b/libvpx/vpx_dsp/bitwriter.h index f6ca9b916..d904997af 100644 --- a/libvpx/vpx_dsp/bitwriter.h +++ b/libvpx/vpx_dsp/bitwriter.h @@ -35,7 +35,7 @@ static INLINE void vpx_write(vpx_writer *br, int bit, int probability) { int count = br->count; unsigned int range = br->range; unsigned int lowvalue = br->lowvalue; - register unsigned int shift; + register int shift; split = 1 + (((range - 1) * probability) >> 8); diff --git a/libvpx/vpx_dsp/fwd_txfm.c b/libvpx/vpx_dsp/fwd_txfm.c index 7baaa8b0d..4c0d5db83 100644 --- a/libvpx/vpx_dsp/fwd_txfm.c +++ b/libvpx/vpx_dsp/fwd_txfm.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/fwd_txfm.h" void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { @@ -85,7 +86,6 @@ void vpx_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { sum += input[r * stride + c]; output[0] = sum << 1; - output[1] = 0; } void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { @@ -182,7 +182,6 @@ void vpx_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { sum += input[r * stride + c]; output[0] = sum; - output[1] = 0; } void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { @@ -367,13 +366,12 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; - tran_low_t sum = 0; + int sum = 0; for (r = 0; r < 16; ++r) for (c = 0; c < 16; ++c) sum += input[r * stride + c]; - output[0] = sum >> 1; - output[1] = 0; + output[0] = (tran_low_t)(sum >> 1); } static INLINE tran_high_t dct_32_round(tran_high_t input) { @@ -771,13 +769,12 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; - tran_low_t sum = 0; + int sum = 0; for (r = 0; r < 32; ++r) for (c = 0; c < 32; ++c) sum += input[r * stride + c]; - output[0] = sum >> 3; - output[1] = 0; + output[0] = (tran_low_t)(sum >> 3); } #if CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vpx_dsp/intrapred.c b/libvpx/vpx_dsp/intrapred.c index a9669e512..cc4a74bd2 100644 --- a/libvpx/vpx_dsp/intrapred.c +++ b/libvpx/vpx_dsp/intrapred.c @@ -44,6 +44,7 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; } +#if CONFIG_MISC_FIXES static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; @@ -58,6 +59,7 @@ static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } +#endif // CONFIG_MISC_FIXES static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { @@ -76,6 +78,7 @@ static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, } } +#if CONFIG_MISC_FIXES static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; @@ -89,6 +92,7 @@ static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } +#endif // CONFIG_MISC_FIXES static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { @@ -109,6 +113,7 @@ static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, } } +#if CONFIG_MISC_FIXES static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; @@ -121,6 +126,7 @@ static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst += stride; } } +#endif // CONFIG_MISC_FIXES static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { @@ -152,20 +158,29 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + int i; +#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 + // silence a spurious -Warray-bounds warning, possibly related to: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 + uint8_t border[69]; +#else + uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right +#endif - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + // dst(bs, bs - 2)[0], i.e., border starting at bottom-left + for (i = 0; i < bs - 2; ++i) { + border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); + } + border[bs - 2] = AVG3(above[-1], left[0], left[1]); + border[bs - 1] = AVG3(left[0], above[-1], above[0]); + border[bs - 0] = AVG3(above[-1], above[0], above[1]); + // dst[0][2, size), i.e., remaining top border ascending + for (i = 0; i < bs - 2; ++i) { + border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); + } - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; + for (i = 0; i < bs; ++i) { + memcpy(dst + i * stride, border + bs - 1 - i, bs); } } @@ -311,6 +326,7 @@ void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const int K = above[2]; const int L = above[3]; const int M = above[4]; + (void)left; dst[0] = AVG3(H, I, J); dst[1] = AVG3(I, J, K); @@ -528,6 +544,7 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, } } +#if CONFIG_MISC_FIXES static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { @@ -544,6 +561,7 @@ static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, dst += stride; } } +#endif // CONFIG_MISC_FIXES static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, @@ -579,6 +597,7 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, } } +#if CONFIG_MISC_FIXES static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { @@ -593,6 +612,7 @@ static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, dst += stride; } } +#endif // CONFIG_MISC_FIXES static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, diff --git a/libvpx/vpx_dsp/inv_txfm.c b/libvpx/vpx_dsp/inv_txfm.c index 5f3cfddbd..e18d31d7a 100644 --- a/libvpx/vpx_dsp/inv_txfm.c +++ b/libvpx/vpx_dsp/inv_txfm.c @@ -11,6 +11,7 @@ #include <math.h> #include <string.h> +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/inv_txfm.h" void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -34,10 +35,10 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { c1 = e1 - c1; a1 -= b1; d1 += c1; - op[0] = WRAPLOW(a1, 8); - op[1] = WRAPLOW(b1, 8); - op[2] = WRAPLOW(c1, 8); - op[3] = WRAPLOW(d1, 8); + op[0] = WRAPLOW(a1); + op[1] = WRAPLOW(b1); + op[2] = WRAPLOW(c1); + op[3] = WRAPLOW(d1); ip += 4; op += 4; } @@ -55,10 +56,10 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { c1 = e1 - c1; a1 -= b1; d1 += c1; - dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); - dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); - dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); - dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); + dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); + dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); + dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); + dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); ip++; dest++; @@ -75,8 +76,8 @@ void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; - op[0] = WRAPLOW(a1, 8); - op[1] = op[2] = op[3] = WRAPLOW(e1, 8); + op[0] = WRAPLOW(a1); + op[1] = op[2] = op[3] = WRAPLOW(e1); ip = tmp; for (i = 0; i < 4; i++) { @@ -97,18 +98,18 @@ void idct4_c(const tran_low_t *input, tran_low_t *output) { // stage 1 temp1 = (input[0] + input[2]) * cospi_16_64; temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step[0] = WRAPLOW(dct_const_round_shift(temp1)); + step[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); + step[2] = WRAPLOW(dct_const_round_shift(temp1)); + step[3] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 - output[0] = WRAPLOW(step[0] + step[3], 8); - output[1] = WRAPLOW(step[1] + step[2], 8); - output[2] = WRAPLOW(step[1] - step[2], 8); - output[3] = WRAPLOW(step[0] - step[3], 8); + output[0] = WRAPLOW(step[0] + step[3]); + output[1] = WRAPLOW(step[1] + step[2]); + output[2] = WRAPLOW(step[1] - step[2]); + output[3] = WRAPLOW(step[0] - step[3]); } void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -140,8 +141,8 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride) { int i; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { @@ -163,48 +164,48 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) { step1[3] = input[6]; temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 temp1 = (step1[0] + step1[2]) * cospi_16_64; temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); // stage 3 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7], 8); - output[1] = WRAPLOW(step1[1] + step1[6], 8); - output[2] = WRAPLOW(step1[2] + step1[5], 8); - output[3] = WRAPLOW(step1[3] + step1[4], 8); - output[4] = WRAPLOW(step1[3] - step1[4], 8); - output[5] = WRAPLOW(step1[2] - step1[5], 8); - output[6] = WRAPLOW(step1[1] - step1[6], 8); - output[7] = WRAPLOW(step1[0] - step1[7], 8); + output[0] = WRAPLOW(step1[0] + step1[7]); + output[1] = WRAPLOW(step1[1] + step1[6]); + output[2] = WRAPLOW(step1[2] + step1[5]); + output[3] = WRAPLOW(step1[3] + step1[4]); + output[4] = WRAPLOW(step1[3] - step1[4]); + output[5] = WRAPLOW(step1[2] - step1[5]); + output[6] = WRAPLOW(step1[1] - step1[6]); + output[7] = WRAPLOW(step1[0] - step1[7]); } void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -235,8 +236,8 @@ void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) @@ -265,7 +266,7 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) { s4 = sinpi_1_9 * x2; s5 = sinpi_2_9 * x3; s6 = sinpi_4_9 * x3; - s7 = x0 - x2 + x3; + s7 = WRAPLOW(x0 - x2 + x3); s0 = s0 + s3 + s5; s1 = s1 - s4 - s6; @@ -276,10 +277,10 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) { // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8); - output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8); - output[2] = WRAPLOW(dct_const_round_shift(s2), 8); - output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8); + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); + output[2] = WRAPLOW(dct_const_round_shift(s2)); + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); } void iadst8_c(const tran_low_t *input, tran_low_t *output) { @@ -310,14 +311,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); - x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); - x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); - x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); - x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); - x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); - x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); - x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); + x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); + x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); + x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); + x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); // stage 2 s0 = (int)x0; @@ -329,14 +330,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); - x0 = WRAPLOW(s0 + s2, 8); - x1 = WRAPLOW(s1 + s3, 8); - x2 = WRAPLOW(s0 - s2, 8); - x3 = WRAPLOW(s1 - s3, 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); // stage 3 s2 = (int)(cospi_16_64 * (x2 + x3)); @@ -344,19 +345,19 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) { s6 = (int)(cospi_16_64 * (x6 + x7)); s7 = (int)(cospi_16_64 * (x6 - x7)); - x2 = WRAPLOW(dct_const_round_shift(s2), 8); - x3 = WRAPLOW(dct_const_round_shift(s3), 8); - x6 = WRAPLOW(dct_const_round_shift(s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s7), 8); - - output[0] = WRAPLOW(x0, 8); - output[1] = WRAPLOW(-x4, 8); - output[2] = WRAPLOW(x6, 8); - output[3] = WRAPLOW(-x2, 8); - output[4] = WRAPLOW(x3, 8); - output[5] = WRAPLOW(-x7, 8); - output[6] = WRAPLOW(x5, 8); - output[7] = WRAPLOW(-x1, 8); + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x4); + output[2] = WRAPLOW(x6); + output[3] = WRAPLOW(-x2); + output[4] = WRAPLOW(x3); + output[5] = WRAPLOW(-x7); + output[6] = WRAPLOW(x5); + output[7] = WRAPLOW(-x1); } void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { @@ -419,23 +420,23 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); // stage 3 step1[0] = step2[0]; @@ -445,109 +446,109 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step1[8] = WRAPLOW(step2[8] + step2[9], 8); - step1[9] = WRAPLOW(step2[8] - step2[9], 8); - step1[10] = WRAPLOW(-step2[10] + step2[11], 8); - step1[11] = WRAPLOW(step2[10] + step2[11], 8); - step1[12] = WRAPLOW(step2[12] + step2[13], 8); - step1[13] = WRAPLOW(step2[12] - step2[13], 8); - step1[14] = WRAPLOW(-step2[14] + step2[15], 8); - step1[15] = WRAPLOW(step2[14] + step2[15], 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], 8); - step1[9] = WRAPLOW(step2[9] + step2[10], 8); - step1[10] = WRAPLOW(step2[9] - step2[10], 8); - step1[11] = WRAPLOW(step2[8] - step2[11], 8); - step1[12] = WRAPLOW(-step2[12] + step2[15], 8); - step1[13] = WRAPLOW(-step2[13] + step2[14], 8); - step1[14] = WRAPLOW(step2[13] + step2[14], 8); - step1[15] = WRAPLOW(step2[12] + step2[15], 8); + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], 8); - step2[1] = WRAPLOW(step1[1] + step1[6], 8); - step2[2] = WRAPLOW(step1[2] + step1[5], 8); - step2[3] = WRAPLOW(step1[3] + step1[4], 8); - step2[4] = WRAPLOW(step1[3] - step1[4], 8); - step2[5] = WRAPLOW(step1[2] - step1[5], 8); - step2[6] = WRAPLOW(step1[1] - step1[6], 8); - step2[7] = WRAPLOW(step1[0] - step1[7], 8); + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15], 8); - output[1] = WRAPLOW(step2[1] + step2[14], 8); - output[2] = WRAPLOW(step2[2] + step2[13], 8); - output[3] = WRAPLOW(step2[3] + step2[12], 8); - output[4] = WRAPLOW(step2[4] + step2[11], 8); - output[5] = WRAPLOW(step2[5] + step2[10], 8); - output[6] = WRAPLOW(step2[6] + step2[9], 8); - output[7] = WRAPLOW(step2[7] + step2[8], 8); - output[8] = WRAPLOW(step2[7] - step2[8], 8); - output[9] = WRAPLOW(step2[6] - step2[9], 8); - output[10] = WRAPLOW(step2[5] - step2[10], 8); - output[11] = WRAPLOW(step2[4] - step2[11], 8); - output[12] = WRAPLOW(step2[3] - step2[12], 8); - output[13] = WRAPLOW(step2[2] - step2[13], 8); - output[14] = WRAPLOW(step2[1] - step2[14], 8); - output[15] = WRAPLOW(step2[0] - step2[15], 8); + output[0] = WRAPLOW(step2[0] + step2[15]); + output[1] = WRAPLOW(step2[1] + step2[14]); + output[2] = WRAPLOW(step2[2] + step2[13]); + output[3] = WRAPLOW(step2[3] + step2[12]); + output[4] = WRAPLOW(step2[4] + step2[11]); + output[5] = WRAPLOW(step2[5] + step2[10]); + output[6] = WRAPLOW(step2[6] + step2[9]); + output[7] = WRAPLOW(step2[7] + step2[8]); + output[8] = WRAPLOW(step2[7] - step2[8]); + output[9] = WRAPLOW(step2[6] - step2[9]); + output[10] = WRAPLOW(step2[5] - step2[10]); + output[11] = WRAPLOW(step2[4] - step2[11]); + output[12] = WRAPLOW(step2[3] - step2[12]); + output[13] = WRAPLOW(step2[2] - step2[13]); + output[14] = WRAPLOW(step2[1] - step2[14]); + output[15] = WRAPLOW(step2[0] - step2[15]); } void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, @@ -624,22 +625,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8); - x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8); - x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8); - x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8); - x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8); - x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8); - x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8); - x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8); - x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8); - x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8); - x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8); - x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8); - x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8); + x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); + x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); + x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); + x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); + x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); + x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); + x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); + x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); + x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); // stage 2 s0 = x0; @@ -659,22 +660,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - x0 = WRAPLOW(s0 + s4, 8); - x1 = WRAPLOW(s1 + s5, 8); - x2 = WRAPLOW(s2 + s6, 8); - x3 = WRAPLOW(s3 + s7, 8); - x4 = WRAPLOW(s0 - s4, 8); - x5 = WRAPLOW(s1 - s5, 8); - x6 = WRAPLOW(s2 - s6, 8); - x7 = WRAPLOW(s3 - s7, 8); - x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8); - x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8); - x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8); - x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8); - x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8); - x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8); - x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8); + x0 = WRAPLOW(s0 + s4); + x1 = WRAPLOW(s1 + s5); + x2 = WRAPLOW(s2 + s6); + x3 = WRAPLOW(s3 + s7); + x4 = WRAPLOW(s0 - s4); + x5 = WRAPLOW(s1 - s5); + x6 = WRAPLOW(s2 - s6); + x7 = WRAPLOW(s3 - s7); + x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); + x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); + x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); + x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); + x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); // stage 3 s0 = x0; @@ -694,22 +695,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - x0 = WRAPLOW(check_range(s0 + s2), 8); - x1 = WRAPLOW(check_range(s1 + s3), 8); - x2 = WRAPLOW(check_range(s0 - s2), 8); - x3 = WRAPLOW(check_range(s1 - s3), 8); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); - x8 = WRAPLOW(check_range(s8 + s10), 8); - x9 = WRAPLOW(check_range(s9 + s11), 8); - x10 = WRAPLOW(check_range(s8 - s10), 8); - x11 = WRAPLOW(check_range(s9 - s11), 8); - x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8); - x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8); - x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8); + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); + x8 = WRAPLOW(s8 + s10); + x9 = WRAPLOW(s9 + s11); + x10 = WRAPLOW(s8 - s10); + x11 = WRAPLOW(s9 - s11); + x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); + x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); + x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); // stage 4 s2 = (- cospi_16_64) * (x2 + x3); @@ -721,31 +722,31 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) { s14 = (- cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); - x2 = WRAPLOW(dct_const_round_shift(s2), 8); - x3 = WRAPLOW(dct_const_round_shift(s3), 8); - x6 = WRAPLOW(dct_const_round_shift(s6), 8); - x7 = WRAPLOW(dct_const_round_shift(s7), 8); - x10 = WRAPLOW(dct_const_round_shift(s10), 8); - x11 = WRAPLOW(dct_const_round_shift(s11), 8); - x14 = WRAPLOW(dct_const_round_shift(s14), 8); - x15 = WRAPLOW(dct_const_round_shift(s15), 8); - - output[0] = WRAPLOW(x0, 8); - output[1] = WRAPLOW(-x8, 8); - output[2] = WRAPLOW(x12, 8); - output[3] = WRAPLOW(-x4, 8); - output[4] = WRAPLOW(x6, 8); - output[5] = WRAPLOW(x14, 8); - output[6] = WRAPLOW(x10, 8); - output[7] = WRAPLOW(x2, 8); - output[8] = WRAPLOW(x3, 8); - output[9] = WRAPLOW(x11, 8); - output[10] = WRAPLOW(x15, 8); - output[11] = WRAPLOW(x7, 8); - output[12] = WRAPLOW(x5, 8); - output[13] = WRAPLOW(-x13, 8); - output[14] = WRAPLOW(x9, 8); - output[15] = WRAPLOW(-x1, 8); + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + x10 = WRAPLOW(dct_const_round_shift(s10)); + x11 = WRAPLOW(dct_const_round_shift(s11)); + x14 = WRAPLOW(dct_const_round_shift(s14)); + x15 = WRAPLOW(dct_const_round_shift(s15)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x8); + output[2] = WRAPLOW(x12); + output[3] = WRAPLOW(-x4); + output[4] = WRAPLOW(x6); + output[5] = WRAPLOW(x14); + output[6] = WRAPLOW(x10); + output[7] = WRAPLOW(x2); + output[8] = WRAPLOW(x3); + output[9] = WRAPLOW(x11); + output[10] = WRAPLOW(x15); + output[11] = WRAPLOW(x7); + output[12] = WRAPLOW(x5); + output[13] = WRAPLOW(-x13); + output[14] = WRAPLOW(x9); + output[15] = WRAPLOW(-x1); } void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, @@ -778,8 +779,8 @@ void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) @@ -812,43 +813,43 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[31] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[16] = WRAPLOW(dct_const_round_shift(temp1)); + step1[31] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 step2[0] = step1[0]; @@ -862,40 +863,40 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step2[16] = WRAPLOW(step1[16] + step1[17], 8); - step2[17] = WRAPLOW(step1[16] - step1[17], 8); - step2[18] = WRAPLOW(-step1[18] + step1[19], 8); - step2[19] = WRAPLOW(step1[18] + step1[19], 8); - step2[20] = WRAPLOW(step1[20] + step1[21], 8); - step2[21] = WRAPLOW(step1[20] - step1[21], 8); - step2[22] = WRAPLOW(-step1[22] + step1[23], 8); - step2[23] = WRAPLOW(step1[22] + step1[23], 8); - step2[24] = WRAPLOW(step1[24] + step1[25], 8); - step2[25] = WRAPLOW(step1[24] - step1[25], 8); - step2[26] = WRAPLOW(-step1[26] + step1[27], 8); - step2[27] = WRAPLOW(step1[26] + step1[27], 8); - step2[28] = WRAPLOW(step1[28] + step1[29], 8); - step2[29] = WRAPLOW(step1[28] - step1[29], 8); - step2[30] = WRAPLOW(-step1[30] + step1[31], 8); - step2[31] = WRAPLOW(step1[30] + step1[31], 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + + step2[16] = WRAPLOW(step1[16] + step1[17]); + step2[17] = WRAPLOW(step1[16] - step1[17]); + step2[18] = WRAPLOW(-step1[18] + step1[19]); + step2[19] = WRAPLOW(step1[18] + step1[19]); + step2[20] = WRAPLOW(step1[20] + step1[21]); + step2[21] = WRAPLOW(step1[20] - step1[21]); + step2[22] = WRAPLOW(-step1[22] + step1[23]); + step2[23] = WRAPLOW(step1[22] + step1[23]); + step2[24] = WRAPLOW(step1[24] + step1[25]); + step2[25] = WRAPLOW(step1[24] - step1[25]); + step2[26] = WRAPLOW(-step1[26] + step1[27]); + step2[27] = WRAPLOW(step1[26] + step1[27]); + step2[28] = WRAPLOW(step1[28] + step1[29]); + step2[29] = WRAPLOW(step1[28] - step1[29]); + step2[30] = WRAPLOW(-step1[30] + step1[31]); + step2[31] = WRAPLOW(step1[30] + step1[31]); // stage 3 step1[0] = step2[0]; @@ -905,42 +906,42 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step1[8] = WRAPLOW(step2[8] + step2[9], 8); - step1[9] = WRAPLOW(step2[8] - step2[9], 8); - step1[10] = WRAPLOW(-step2[10] + step2[11], 8); - step1[11] = WRAPLOW(step2[10] + step2[11], 8); - step1[12] = WRAPLOW(step2[12] + step2[13], 8); - step1[13] = WRAPLOW(step2[12] - step2[13], 8); - step1[14] = WRAPLOW(-step2[14] + step2[15], 8); - step1[15] = WRAPLOW(step2[14] + step2[15], 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; @@ -949,87 +950,87 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; - step2[16] = WRAPLOW(step1[16] + step1[19], 8); - step2[17] = WRAPLOW(step1[17] + step1[18], 8); - step2[18] = WRAPLOW(step1[17] - step1[18], 8); - step2[19] = WRAPLOW(step1[16] - step1[19], 8); - step2[20] = WRAPLOW(-step1[20] + step1[23], 8); - step2[21] = WRAPLOW(-step1[21] + step1[22], 8); - step2[22] = WRAPLOW(step1[21] + step1[22], 8); - step2[23] = WRAPLOW(step1[20] + step1[23], 8); - - step2[24] = WRAPLOW(step1[24] + step1[27], 8); - step2[25] = WRAPLOW(step1[25] + step1[26], 8); - step2[26] = WRAPLOW(step1[25] - step1[26], 8); - step2[27] = WRAPLOW(step1[24] - step1[27], 8); - step2[28] = WRAPLOW(-step1[28] + step1[31], 8); - step2[29] = WRAPLOW(-step1[29] + step1[30], 8); - step2[30] = WRAPLOW(step1[29] + step1[30], 8); - step2[31] = WRAPLOW(step1[28] + step1[31], 8); + step2[16] = WRAPLOW(step1[16] + step1[19]); + step2[17] = WRAPLOW(step1[17] + step1[18]); + step2[18] = WRAPLOW(step1[17] - step1[18]); + step2[19] = WRAPLOW(step1[16] - step1[19]); + step2[20] = WRAPLOW(-step1[20] + step1[23]); + step2[21] = WRAPLOW(-step1[21] + step1[22]); + step2[22] = WRAPLOW(step1[21] + step1[22]); + step2[23] = WRAPLOW(step1[20] + step1[23]); + + step2[24] = WRAPLOW(step1[24] + step1[27]); + step2[25] = WRAPLOW(step1[25] + step1[26]); + step2[26] = WRAPLOW(step1[25] - step1[26]); + step2[27] = WRAPLOW(step1[24] - step1[27]); + step2[28] = WRAPLOW(-step1[28] + step1[31]); + step2[29] = WRAPLOW(-step1[29] + step1[30]); + step2[30] = WRAPLOW(step1[29] + step1[30]); + step2[31] = WRAPLOW(step1[28] + step1[31]); // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], 8); - step1[9] = WRAPLOW(step2[9] + step2[10], 8); - step1[10] = WRAPLOW(step2[9] - step2[10], 8); - step1[11] = WRAPLOW(step2[8] - step2[11], 8); - step1[12] = WRAPLOW(-step2[12] + step2[15], 8); - step1[13] = WRAPLOW(-step2[13] + step2[14], 8); - step1[14] = WRAPLOW(step2[13] + step2[14], 8); - step1[15] = WRAPLOW(step2[12] + step2[15], 8); + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; @@ -1038,62 +1039,62 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { step1[31] = step2[31]; // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], 8); - step2[1] = WRAPLOW(step1[1] + step1[6], 8); - step2[2] = WRAPLOW(step1[2] + step1[5], 8); - step2[3] = WRAPLOW(step1[3] + step1[4], 8); - step2[4] = WRAPLOW(step1[3] - step1[4], 8); - step2[5] = WRAPLOW(step1[2] - step1[5], 8); - step2[6] = WRAPLOW(step1[1] - step1[6], 8); - step2[7] = WRAPLOW(step1[0] - step1[7], 8); + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; - step2[16] = WRAPLOW(step1[16] + step1[23], 8); - step2[17] = WRAPLOW(step1[17] + step1[22], 8); - step2[18] = WRAPLOW(step1[18] + step1[21], 8); - step2[19] = WRAPLOW(step1[19] + step1[20], 8); - step2[20] = WRAPLOW(step1[19] - step1[20], 8); - step2[21] = WRAPLOW(step1[18] - step1[21], 8); - step2[22] = WRAPLOW(step1[17] - step1[22], 8); - step2[23] = WRAPLOW(step1[16] - step1[23], 8); - - step2[24] = WRAPLOW(-step1[24] + step1[31], 8); - step2[25] = WRAPLOW(-step1[25] + step1[30], 8); - step2[26] = WRAPLOW(-step1[26] + step1[29], 8); - step2[27] = WRAPLOW(-step1[27] + step1[28], 8); - step2[28] = WRAPLOW(step1[27] + step1[28], 8); - step2[29] = WRAPLOW(step1[26] + step1[29], 8); - step2[30] = WRAPLOW(step1[25] + step1[30], 8); - step2[31] = WRAPLOW(step1[24] + step1[31], 8); + step2[16] = WRAPLOW(step1[16] + step1[23]); + step2[17] = WRAPLOW(step1[17] + step1[22]); + step2[18] = WRAPLOW(step1[18] + step1[21]); + step2[19] = WRAPLOW(step1[19] + step1[20]); + step2[20] = WRAPLOW(step1[19] - step1[20]); + step2[21] = WRAPLOW(step1[18] - step1[21]); + step2[22] = WRAPLOW(step1[17] - step1[22]); + step2[23] = WRAPLOW(step1[16] - step1[23]); + + step2[24] = WRAPLOW(-step1[24] + step1[31]); + step2[25] = WRAPLOW(-step1[25] + step1[30]); + step2[26] = WRAPLOW(-step1[26] + step1[29]); + step2[27] = WRAPLOW(-step1[27] + step1[28]); + step2[28] = WRAPLOW(step1[27] + step1[28]); + step2[29] = WRAPLOW(step1[26] + step1[29]); + step2[30] = WRAPLOW(step1[25] + step1[30]); + step2[31] = WRAPLOW(step1[24] + step1[31]); // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15], 8); - step1[1] = WRAPLOW(step2[1] + step2[14], 8); - step1[2] = WRAPLOW(step2[2] + step2[13], 8); - step1[3] = WRAPLOW(step2[3] + step2[12], 8); - step1[4] = WRAPLOW(step2[4] + step2[11], 8); - step1[5] = WRAPLOW(step2[5] + step2[10], 8); - step1[6] = WRAPLOW(step2[6] + step2[9], 8); - step1[7] = WRAPLOW(step2[7] + step2[8], 8); - step1[8] = WRAPLOW(step2[7] - step2[8], 8); - step1[9] = WRAPLOW(step2[6] - step2[9], 8); - step1[10] = WRAPLOW(step2[5] - step2[10], 8); - step1[11] = WRAPLOW(step2[4] - step2[11], 8); - step1[12] = WRAPLOW(step2[3] - step2[12], 8); - step1[13] = WRAPLOW(step2[2] - step2[13], 8); - step1[14] = WRAPLOW(step2[1] - step2[14], 8); - step1[15] = WRAPLOW(step2[0] - step2[15], 8); + step1[0] = WRAPLOW(step2[0] + step2[15]); + step1[1] = WRAPLOW(step2[1] + step2[14]); + step1[2] = WRAPLOW(step2[2] + step2[13]); + step1[3] = WRAPLOW(step2[3] + step2[12]); + step1[4] = WRAPLOW(step2[4] + step2[11]); + step1[5] = WRAPLOW(step2[5] + step2[10]); + step1[6] = WRAPLOW(step2[6] + step2[9]); + step1[7] = WRAPLOW(step2[7] + step2[8]); + step1[8] = WRAPLOW(step2[7] - step2[8]); + step1[9] = WRAPLOW(step2[6] - step2[9]); + step1[10] = WRAPLOW(step2[5] - step2[10]); + step1[11] = WRAPLOW(step2[4] - step2[11]); + step1[12] = WRAPLOW(step2[3] - step2[12]); + step1[13] = WRAPLOW(step2[2] - step2[13]); + step1[14] = WRAPLOW(step2[1] - step2[14]); + step1[15] = WRAPLOW(step2[0] - step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; @@ -1101,58 +1102,58 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) { step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * cospi_16_64; temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[21] + step2[26]) * cospi_16_64; temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[22] + step2[25]) * cospi_16_64; temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[23] + step2[24]) * cospi_16_64; temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage - output[0] = WRAPLOW(step1[0] + step1[31], 8); - output[1] = WRAPLOW(step1[1] + step1[30], 8); - output[2] = WRAPLOW(step1[2] + step1[29], 8); - output[3] = WRAPLOW(step1[3] + step1[28], 8); - output[4] = WRAPLOW(step1[4] + step1[27], 8); - output[5] = WRAPLOW(step1[5] + step1[26], 8); - output[6] = WRAPLOW(step1[6] + step1[25], 8); - output[7] = WRAPLOW(step1[7] + step1[24], 8); - output[8] = WRAPLOW(step1[8] + step1[23], 8); - output[9] = WRAPLOW(step1[9] + step1[22], 8); - output[10] = WRAPLOW(step1[10] + step1[21], 8); - output[11] = WRAPLOW(step1[11] + step1[20], 8); - output[12] = WRAPLOW(step1[12] + step1[19], 8); - output[13] = WRAPLOW(step1[13] + step1[18], 8); - output[14] = WRAPLOW(step1[14] + step1[17], 8); - output[15] = WRAPLOW(step1[15] + step1[16], 8); - output[16] = WRAPLOW(step1[15] - step1[16], 8); - output[17] = WRAPLOW(step1[14] - step1[17], 8); - output[18] = WRAPLOW(step1[13] - step1[18], 8); - output[19] = WRAPLOW(step1[12] - step1[19], 8); - output[20] = WRAPLOW(step1[11] - step1[20], 8); - output[21] = WRAPLOW(step1[10] - step1[21], 8); - output[22] = WRAPLOW(step1[9] - step1[22], 8); - output[23] = WRAPLOW(step1[8] - step1[23], 8); - output[24] = WRAPLOW(step1[7] - step1[24], 8); - output[25] = WRAPLOW(step1[6] - step1[25], 8); - output[26] = WRAPLOW(step1[5] - step1[26], 8); - output[27] = WRAPLOW(step1[4] - step1[27], 8); - output[28] = WRAPLOW(step1[3] - step1[28], 8); - output[29] = WRAPLOW(step1[2] - step1[29], 8); - output[30] = WRAPLOW(step1[1] - step1[30], 8); - output[31] = WRAPLOW(step1[0] - step1[31], 8); + output[0] = WRAPLOW(step1[0] + step1[31]); + output[1] = WRAPLOW(step1[1] + step1[30]); + output[2] = WRAPLOW(step1[2] + step1[29]); + output[3] = WRAPLOW(step1[3] + step1[28]); + output[4] = WRAPLOW(step1[4] + step1[27]); + output[5] = WRAPLOW(step1[5] + step1[26]); + output[6] = WRAPLOW(step1[6] + step1[25]); + output[7] = WRAPLOW(step1[7] + step1[24]); + output[8] = WRAPLOW(step1[8] + step1[23]); + output[9] = WRAPLOW(step1[9] + step1[22]); + output[10] = WRAPLOW(step1[10] + step1[21]); + output[11] = WRAPLOW(step1[11] + step1[20]); + output[12] = WRAPLOW(step1[12] + step1[19]); + output[13] = WRAPLOW(step1[13] + step1[18]); + output[14] = WRAPLOW(step1[14] + step1[17]); + output[15] = WRAPLOW(step1[15] + step1[16]); + output[16] = WRAPLOW(step1[15] - step1[16]); + output[17] = WRAPLOW(step1[14] - step1[17]); + output[18] = WRAPLOW(step1[13] - step1[18]); + output[19] = WRAPLOW(step1[12] - step1[19]); + output[20] = WRAPLOW(step1[11] - step1[20]); + output[21] = WRAPLOW(step1[10] - step1[21]); + output[22] = WRAPLOW(step1[9] - step1[22]); + output[23] = WRAPLOW(step1[8] - step1[23]); + output[24] = WRAPLOW(step1[7] - step1[24]); + output[25] = WRAPLOW(step1[6] - step1[25]); + output[26] = WRAPLOW(step1[5] - step1[26]); + output[27] = WRAPLOW(step1[4] - step1[27]); + output[28] = WRAPLOW(step1[3] - step1[28]); + output[29] = WRAPLOW(step1[2] - step1[29]); + output[30] = WRAPLOW(step1[1] - step1[30]); + output[31] = WRAPLOW(step1[0] - step1[31]); } void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, @@ -1194,6 +1195,33 @@ void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, } } +void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[32 * 32] = {0}; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + + // Rows + // only upper-left 16x16 has non-zero coeff + for (i = 0; i < 16; ++i) { + idct32_c(input, outptr); + input += 32; + outptr += 32; + } + + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + idct32_c(temp_in, temp_out); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int stride) { tran_low_t out[32 * 32] = {0}; @@ -1225,8 +1253,8 @@ void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { @@ -1260,10 +1288,10 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, c1 = e1 - c1; a1 -= b1; d1 += c1; - op[0] = WRAPLOW(a1, bd); - op[1] = WRAPLOW(b1, bd); - op[2] = WRAPLOW(c1, bd); - op[3] = WRAPLOW(d1, bd); + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = HIGHBD_WRAPLOW(b1, bd); + op[2] = HIGHBD_WRAPLOW(c1, bd); + op[3] = HIGHBD_WRAPLOW(d1, bd); ip += 4; op += 4; } @@ -1281,10 +1309,14 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, c1 = e1 - c1; a1 -= b1; d1 += c1; - dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); - dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); - dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); - dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); + dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], + HIGHBD_WRAPLOW(a1, bd), bd); + dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], + HIGHBD_WRAPLOW(b1, bd), bd); + dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], + HIGHBD_WRAPLOW(c1, bd), bd); + dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], + HIGHBD_WRAPLOW(d1, bd), bd); ip++; dest++; @@ -1304,8 +1336,8 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; - op[0] = WRAPLOW(a1, bd); - op[1] = op[2] = op[3] = WRAPLOW(e1, bd); + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); ip = tmp; for (i = 0; i < 4; i++) { @@ -1331,18 +1363,18 @@ void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { // stage 1 temp1 = (input[0] + input[2]) * cospi_16_64; temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 - output[0] = WRAPLOW(step[0] + step[3], bd); - output[1] = WRAPLOW(step[1] + step[2], bd); - output[2] = WRAPLOW(step[1] - step[2], bd); - output[3] = WRAPLOW(step[0] - step[3], bd); + output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); + output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); + output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); + output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); } void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1376,11 +1408,11 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, int dest_stride, int bd) { int i; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { @@ -1402,39 +1434,39 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { step1[3] = input[6]; temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 & stage 3 - even half vpx_highbd_idct4_c(step1, step1, bd); // stage 2 - odd half - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); // stage 3 - odd half step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7], bd); - output[1] = WRAPLOW(step1[1] + step1[6], bd); - output[2] = WRAPLOW(step1[2] + step1[5], bd); - output[3] = WRAPLOW(step1[3] + step1[4], bd); - output[4] = WRAPLOW(step1[3] - step1[4], bd); - output[5] = WRAPLOW(step1[2] - step1[5], bd); - output[6] = WRAPLOW(step1[1] - step1[6], bd); - output[7] = WRAPLOW(step1[0] - step1[7], bd); + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); } void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1468,10 +1500,10 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) @@ -1501,7 +1533,7 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { s4 = sinpi_1_9 * x2; s5 = sinpi_2_9 * x3; s6 = sinpi_4_9 * x3; - s7 = (tran_high_t)(x0 - x2 + x3); + s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); s0 = s0 + s3 + s5; s1 = s1 - s4 - s6; @@ -1512,10 +1544,10 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); - output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); - output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); + output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd); + output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd); + output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd); } void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { @@ -1546,14 +1578,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd); - x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd); + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd); // stage 2 s0 = x0; @@ -1565,14 +1597,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; s7 = cospi_8_64 * x6 + cospi_24_64 * x7; - x0 = WRAPLOW(s0 + s2, bd); - x1 = WRAPLOW(s1 + s3, bd); - x2 = WRAPLOW(s0 - s2, bd); - x3 = WRAPLOW(s1 - s3, bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); // stage 3 s2 = cospi_16_64 * (x2 + x3); @@ -1580,19 +1612,19 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (x6 - x7); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); - - output[0] = WRAPLOW(x0, bd); - output[1] = WRAPLOW(-x4, bd); - output[2] = WRAPLOW(x6, bd); - output[3] = WRAPLOW(-x2, bd); - output[4] = WRAPLOW(x3, bd); - output[5] = WRAPLOW(-x7, bd); - output[6] = WRAPLOW(x5, bd); - output[7] = WRAPLOW(-x1, bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x4, bd); + output[2] = HIGHBD_WRAPLOW(x6, bd); + output[3] = HIGHBD_WRAPLOW(-x2, bd); + output[4] = HIGHBD_WRAPLOW(x3, bd); + output[5] = HIGHBD_WRAPLOW(-x7, bd); + output[6] = HIGHBD_WRAPLOW(x5, bd); + output[7] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1657,23 +1689,23 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 3 step1[0] = step2[0]; @@ -1683,109 +1715,109 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step1[8] = WRAPLOW(step2[8] + step2[9], bd); - step1[9] = WRAPLOW(step2[8] - step2[9], bd); - step1[10] = WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = WRAPLOW(step2[10] + step2[11], bd); - step1[12] = WRAPLOW(step2[12] + step2[13], bd); - step1[13] = WRAPLOW(step2[12] - step2[13], bd); - step1[14] = WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = WRAPLOW(step2[14] + step2[15], bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], bd); - step1[1] = WRAPLOW(step2[1] + step2[2], bd); - step1[2] = WRAPLOW(step2[1] - step2[2], bd); - step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], bd); - step1[9] = WRAPLOW(step2[9] + step2[10], bd); - step1[10] = WRAPLOW(step2[9] - step2[10], bd); - step1[11] = WRAPLOW(step2[8] - step2[11], bd); - step1[12] = WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = WRAPLOW(step2[13] + step2[14], bd); - step1[15] = WRAPLOW(step2[12] + step2[15], bd); + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], bd); - step2[1] = WRAPLOW(step1[1] + step1[6], bd); - step2[2] = WRAPLOW(step1[2] + step1[5], bd); - step2[3] = WRAPLOW(step1[3] + step1[4], bd); - step2[4] = WRAPLOW(step1[3] - step1[4], bd); - step2[5] = WRAPLOW(step1[2] - step1[5], bd); - step2[6] = WRAPLOW(step1[1] - step1[6], bd); - step2[7] = WRAPLOW(step1[0] - step1[7], bd); + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15], bd); - output[1] = WRAPLOW(step2[1] + step2[14], bd); - output[2] = WRAPLOW(step2[2] + step2[13], bd); - output[3] = WRAPLOW(step2[3] + step2[12], bd); - output[4] = WRAPLOW(step2[4] + step2[11], bd); - output[5] = WRAPLOW(step2[5] + step2[10], bd); - output[6] = WRAPLOW(step2[6] + step2[9], bd); - output[7] = WRAPLOW(step2[7] + step2[8], bd); - output[8] = WRAPLOW(step2[7] - step2[8], bd); - output[9] = WRAPLOW(step2[6] - step2[9], bd); - output[10] = WRAPLOW(step2[5] - step2[10], bd); - output[11] = WRAPLOW(step2[4] - step2[11], bd); - output[12] = WRAPLOW(step2[3] - step2[12], bd); - output[13] = WRAPLOW(step2[2] - step2[13], bd); - output[14] = WRAPLOW(step2[1] - step2[14], bd); - output[15] = WRAPLOW(step2[0] - step2[15], bd); + output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); } void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1861,22 +1893,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd); - x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd); - x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd); - x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd); // stage 2 s0 = x0; @@ -1896,22 +1928,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - x0 = WRAPLOW(s0 + s4, bd); - x1 = WRAPLOW(s1 + s5, bd); - x2 = WRAPLOW(s2 + s6, bd); - x3 = WRAPLOW(s3 + s7, bd); - x4 = WRAPLOW(s0 - s4, bd); - x5 = WRAPLOW(s1 - s5, bd); - x6 = WRAPLOW(s2 - s6, bd); - x7 = WRAPLOW(s3 - s7, bd); - x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd); - x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s4, bd); + x1 = HIGHBD_WRAPLOW(s1 + s5, bd); + x2 = HIGHBD_WRAPLOW(s2 + s6, bd); + x3 = HIGHBD_WRAPLOW(s3 + s7, bd); + x4 = HIGHBD_WRAPLOW(s0 - s4, bd); + x5 = HIGHBD_WRAPLOW(s1 - s5, bd); + x6 = HIGHBD_WRAPLOW(s2 - s6, bd); + x7 = HIGHBD_WRAPLOW(s3 - s7, bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd); // stage 3 s0 = x0; @@ -1931,22 +1963,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - x0 = WRAPLOW(s0 + s2, bd); - x1 = WRAPLOW(s1 + s3, bd); - x2 = WRAPLOW(s0 - s2, bd); - x3 = WRAPLOW(s1 - s3, bd); - x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); - x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); - x8 = WRAPLOW(s8 + s10, bd); - x9 = WRAPLOW(s9 + s11, bd); - x10 = WRAPLOW(s8 - s10, bd); - x11 = WRAPLOW(s9 - s11, bd); - x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd); - x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd); + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); + x8 = HIGHBD_WRAPLOW(s8 + s10, bd); + x9 = HIGHBD_WRAPLOW(s9 + s11, bd); + x10 = HIGHBD_WRAPLOW(s8 - s10, bd); + x11 = HIGHBD_WRAPLOW(s9 - s11, bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); // stage 4 s2 = (- cospi_16_64) * (x2 + x3); @@ -1958,31 +1990,31 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { s14 = (- cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); - x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); - x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); - x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); - x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); - x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd); - x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd); - x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd); - x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd); - - output[0] = WRAPLOW(x0, bd); - output[1] = WRAPLOW(-x8, bd); - output[2] = WRAPLOW(x12, bd); - output[3] = WRAPLOW(-x4, bd); - output[4] = WRAPLOW(x6, bd); - output[5] = WRAPLOW(x14, bd); - output[6] = WRAPLOW(x10, bd); - output[7] = WRAPLOW(x2, bd); - output[8] = WRAPLOW(x3, bd); - output[9] = WRAPLOW(x11, bd); - output[10] = WRAPLOW(x15, bd); - output[11] = WRAPLOW(x7, bd); - output[12] = WRAPLOW(x5, bd); - output[13] = WRAPLOW(-x13, bd); - output[14] = WRAPLOW(x9, bd); - output[15] = WRAPLOW(-x1, bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x8, bd); + output[2] = HIGHBD_WRAPLOW(x12, bd); + output[3] = HIGHBD_WRAPLOW(-x4, bd); + output[4] = HIGHBD_WRAPLOW(x6, bd); + output[5] = HIGHBD_WRAPLOW(x14, bd); + output[6] = HIGHBD_WRAPLOW(x10, bd); + output[7] = HIGHBD_WRAPLOW(x2, bd); + output[8] = HIGHBD_WRAPLOW(x3, bd); + output[9] = HIGHBD_WRAPLOW(x11, bd); + output[10] = HIGHBD_WRAPLOW(x15, bd); + output[11] = HIGHBD_WRAPLOW(x7, bd); + output[12] = HIGHBD_WRAPLOW(x5, bd); + output[13] = HIGHBD_WRAPLOW(-x13, bd); + output[14] = HIGHBD_WRAPLOW(x9, bd); + output[15] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, @@ -2017,11 +2049,11 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) @@ -2056,43 +2088,43 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); // stage 2 step2[0] = step1[0]; @@ -2106,40 +2138,40 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step2[16] = WRAPLOW(step1[16] + step1[17], bd); - step2[17] = WRAPLOW(step1[16] - step1[17], bd); - step2[18] = WRAPLOW(-step1[18] + step1[19], bd); - step2[19] = WRAPLOW(step1[18] + step1[19], bd); - step2[20] = WRAPLOW(step1[20] + step1[21], bd); - step2[21] = WRAPLOW(step1[20] - step1[21], bd); - step2[22] = WRAPLOW(-step1[22] + step1[23], bd); - step2[23] = WRAPLOW(step1[22] + step1[23], bd); - step2[24] = WRAPLOW(step1[24] + step1[25], bd); - step2[25] = WRAPLOW(step1[24] - step1[25], bd); - step2[26] = WRAPLOW(-step1[26] + step1[27], bd); - step2[27] = WRAPLOW(step1[26] + step1[27], bd); - step2[28] = WRAPLOW(step1[28] + step1[29], bd); - step2[29] = WRAPLOW(step1[28] - step1[29], bd); - step2[30] = WRAPLOW(-step1[30] + step1[31], bd); - step2[31] = WRAPLOW(step1[30] + step1[31], bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); + step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); + step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); + step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); + step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); + step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); + step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); + step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); + step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); + step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); // stage 3 step1[0] = step2[0]; @@ -2149,42 +2181,42 @@ static void highbd_idct32_c(const tran_low_t *input, temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - - step1[8] = WRAPLOW(step2[8] + step2[9], bd); - step1[9] = WRAPLOW(step2[8] - step2[9], bd); - step1[10] = WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = WRAPLOW(step2[10] + step2[11], bd); - step1[12] = WRAPLOW(step2[12] + step2[13], bd); - step1[13] = WRAPLOW(step2[12] - step2[13], bd); - step1[14] = WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = WRAPLOW(step2[14] + step2[15], bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; @@ -2193,87 +2225,87 @@ static void highbd_idct32_c(const tran_low_t *input, // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); - step2[4] = WRAPLOW(step1[4] + step1[5], bd); - step2[5] = WRAPLOW(step1[4] - step1[5], bd); - step2[6] = WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = WRAPLOW(step1[6] + step1[7], bd); + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; - step2[16] = WRAPLOW(step1[16] + step1[19], bd); - step2[17] = WRAPLOW(step1[17] + step1[18], bd); - step2[18] = WRAPLOW(step1[17] - step1[18], bd); - step2[19] = WRAPLOW(step1[16] - step1[19], bd); - step2[20] = WRAPLOW(-step1[20] + step1[23], bd); - step2[21] = WRAPLOW(-step1[21] + step1[22], bd); - step2[22] = WRAPLOW(step1[21] + step1[22], bd); - step2[23] = WRAPLOW(step1[20] + step1[23], bd); - - step2[24] = WRAPLOW(step1[24] + step1[27], bd); - step2[25] = WRAPLOW(step1[25] + step1[26], bd); - step2[26] = WRAPLOW(step1[25] - step1[26], bd); - step2[27] = WRAPLOW(step1[24] - step1[27], bd); - step2[28] = WRAPLOW(-step1[28] + step1[31], bd); - step2[29] = WRAPLOW(-step1[29] + step1[30], bd); - step2[30] = WRAPLOW(step1[29] + step1[30], bd); - step2[31] = WRAPLOW(step1[28] + step1[31], bd); + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); + step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); + step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); + step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); + step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); + step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); + step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); + step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); + step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); + step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], bd); - step1[1] = WRAPLOW(step2[1] + step2[2], bd); - step1[2] = WRAPLOW(step2[1] - step2[2], bd); - step1[3] = WRAPLOW(step2[0] - step2[3], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[7] = step2[7]; - step1[8] = WRAPLOW(step2[8] + step2[11], bd); - step1[9] = WRAPLOW(step2[9] + step2[10], bd); - step1[10] = WRAPLOW(step2[9] - step2[10], bd); - step1[11] = WRAPLOW(step2[8] - step2[11], bd); - step1[12] = WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = WRAPLOW(step2[13] + step2[14], bd); - step1[15] = WRAPLOW(step2[12] + step2[15], bd); + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; @@ -2282,62 +2314,62 @@ static void highbd_idct32_c(const tran_low_t *input, step1[31] = step2[31]; // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], bd); - step2[1] = WRAPLOW(step1[1] + step1[6], bd); - step2[2] = WRAPLOW(step1[2] + step1[5], bd); - step2[3] = WRAPLOW(step1[3] + step1[4], bd); - step2[4] = WRAPLOW(step1[3] - step1[4], bd); - step2[5] = WRAPLOW(step1[2] - step1[5], bd); - step2[6] = WRAPLOW(step1[1] - step1[6], bd); - step2[7] = WRAPLOW(step1[0] - step1[7], bd); + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; - step2[16] = WRAPLOW(step1[16] + step1[23], bd); - step2[17] = WRAPLOW(step1[17] + step1[22], bd); - step2[18] = WRAPLOW(step1[18] + step1[21], bd); - step2[19] = WRAPLOW(step1[19] + step1[20], bd); - step2[20] = WRAPLOW(step1[19] - step1[20], bd); - step2[21] = WRAPLOW(step1[18] - step1[21], bd); - step2[22] = WRAPLOW(step1[17] - step1[22], bd); - step2[23] = WRAPLOW(step1[16] - step1[23], bd); - - step2[24] = WRAPLOW(-step1[24] + step1[31], bd); - step2[25] = WRAPLOW(-step1[25] + step1[30], bd); - step2[26] = WRAPLOW(-step1[26] + step1[29], bd); - step2[27] = WRAPLOW(-step1[27] + step1[28], bd); - step2[28] = WRAPLOW(step1[27] + step1[28], bd); - step2[29] = WRAPLOW(step1[26] + step1[29], bd); - step2[30] = WRAPLOW(step1[25] + step1[30], bd); - step2[31] = WRAPLOW(step1[24] + step1[31], bd); + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); + step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); + step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); + step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); + step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); + step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); + step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); + step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); + step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15], bd); - step1[1] = WRAPLOW(step2[1] + step2[14], bd); - step1[2] = WRAPLOW(step2[2] + step2[13], bd); - step1[3] = WRAPLOW(step2[3] + step2[12], bd); - step1[4] = WRAPLOW(step2[4] + step2[11], bd); - step1[5] = WRAPLOW(step2[5] + step2[10], bd); - step1[6] = WRAPLOW(step2[6] + step2[9], bd); - step1[7] = WRAPLOW(step2[7] + step2[8], bd); - step1[8] = WRAPLOW(step2[7] - step2[8], bd); - step1[9] = WRAPLOW(step2[6] - step2[9], bd); - step1[10] = WRAPLOW(step2[5] - step2[10], bd); - step1[11] = WRAPLOW(step2[4] - step2[11], bd); - step1[12] = WRAPLOW(step2[3] - step2[12], bd); - step1[13] = WRAPLOW(step2[2] - step2[13], bd); - step1[14] = WRAPLOW(step2[1] - step2[14], bd); - step1[15] = WRAPLOW(step2[0] - step2[15], bd); + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; @@ -2345,58 +2377,58 @@ static void highbd_idct32_c(const tran_low_t *input, step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * cospi_16_64; temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[21] + step2[26]) * cospi_16_64; temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[22] + step2[25]) * cospi_16_64; temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); temp1 = (-step2[23] + step2[24]) * cospi_16_64; temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); - step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage - output[0] = WRAPLOW(step1[0] + step1[31], bd); - output[1] = WRAPLOW(step1[1] + step1[30], bd); - output[2] = WRAPLOW(step1[2] + step1[29], bd); - output[3] = WRAPLOW(step1[3] + step1[28], bd); - output[4] = WRAPLOW(step1[4] + step1[27], bd); - output[5] = WRAPLOW(step1[5] + step1[26], bd); - output[6] = WRAPLOW(step1[6] + step1[25], bd); - output[7] = WRAPLOW(step1[7] + step1[24], bd); - output[8] = WRAPLOW(step1[8] + step1[23], bd); - output[9] = WRAPLOW(step1[9] + step1[22], bd); - output[10] = WRAPLOW(step1[10] + step1[21], bd); - output[11] = WRAPLOW(step1[11] + step1[20], bd); - output[12] = WRAPLOW(step1[12] + step1[19], bd); - output[13] = WRAPLOW(step1[13] + step1[18], bd); - output[14] = WRAPLOW(step1[14] + step1[17], bd); - output[15] = WRAPLOW(step1[15] + step1[16], bd); - output[16] = WRAPLOW(step1[15] - step1[16], bd); - output[17] = WRAPLOW(step1[14] - step1[17], bd); - output[18] = WRAPLOW(step1[13] - step1[18], bd); - output[19] = WRAPLOW(step1[12] - step1[19], bd); - output[20] = WRAPLOW(step1[11] - step1[20], bd); - output[21] = WRAPLOW(step1[10] - step1[21], bd); - output[22] = WRAPLOW(step1[9] - step1[22], bd); - output[23] = WRAPLOW(step1[8] - step1[23], bd); - output[24] = WRAPLOW(step1[7] - step1[24], bd); - output[25] = WRAPLOW(step1[6] - step1[25], bd); - output[26] = WRAPLOW(step1[5] - step1[26], bd); - output[27] = WRAPLOW(step1[4] - step1[27], bd); - output[28] = WRAPLOW(step1[3] - step1[28], bd); - output[29] = WRAPLOW(step1[2] - step1[29], bd); - output[30] = WRAPLOW(step1[1] - step1[30], bd); - output[31] = WRAPLOW(step1[0] - step1[31], bd); + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); + output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); + output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); + output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); + output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); + output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); + output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); + output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); + output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); + output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); + output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); + output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); + output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); + output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); + output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); + output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); + output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); + output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); + output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); + output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); + output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); + output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); + output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); + output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); + output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); + output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); + output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); + output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); + output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); } void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, @@ -2472,9 +2504,9 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, int a1; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - tran_low_t out = WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); - out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { diff --git a/libvpx/vpx_dsp/inv_txfm.h b/libvpx/vpx_dsp/inv_txfm.h index 23588139e..9cfe1be3a 100644 --- a/libvpx/vpx_dsp/inv_txfm.h +++ b/libvpx/vpx_dsp/inv_txfm.h @@ -21,7 +21,7 @@ extern "C" { #endif -static INLINE tran_low_t check_range(tran_high_t input) { +static INLINE tran_high_t check_range(tran_high_t input) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid VP9 input streams, intermediate stage coefficients should always // stay within the range of a signed 16 bit integer. Coefficients can go out @@ -32,17 +32,17 @@ static INLINE tran_low_t check_range(tran_high_t input) { assert(INT16_MIN <= input); assert(input <= INT16_MAX); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING - return (tran_low_t)input; + return input; } -static INLINE tran_low_t dct_const_round_shift(tran_high_t input) { +static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return check_range(rv); + return (tran_high_t)rv; } #if CONFIG_VP9_HIGHBITDEPTH -static INLINE tran_low_t highbd_check_range(tran_high_t input, - int bd) { +static INLINE tran_high_t highbd_check_range(tran_high_t input, + int bd) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid highbitdepth VP9 streams, intermediate stage coefficients will // stay within the ranges: @@ -56,13 +56,12 @@ static INLINE tran_low_t highbd_check_range(tran_high_t input, (void) int_min; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING (void) bd; - return (tran_low_t)input; + return input; } -static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input, - int bd) { +static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return highbd_check_range(rv, bd); + return (tran_high_t)rv; } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -83,9 +82,20 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input, // bd of 10 uses trans_low with 18bits, need to remove 14bits // bd of 12 uses trans_low with 20bits, need to remove 12bits // bd of x uses trans_low with 8+x bits, need to remove 24-x bits -#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) -#else -#define WRAPLOW(x, bd) ((int32_t)(x)) + +#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH + +#else // CONFIG_EMULATE_HARDWARE + +#define WRAPLOW(x) ((int32_t)check_range(x)) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((int32_t)highbd_check_range((x), bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EMULATE_HARDWARE void idct4_c(const tran_low_t *input, tran_low_t *output); @@ -107,14 +117,14 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, int bd) { - trans = WRAPLOW(trans, bd); - return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); + trans = HIGHBD_WRAPLOW(trans, bd); + return clip_pixel_highbd(dest + (int)trans, bd); } #endif static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { - trans = WRAPLOW(trans, 8); - return clip_pixel(WRAPLOW(dest + trans, 8)); + trans = WRAPLOW(trans); + return clip_pixel(dest + (int)trans); } #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vpx_dsp/loopfilter.c b/libvpx/vpx_dsp/loopfilter.c index 66f4d9576..645a1ab95 100644 --- a/libvpx/vpx_dsp/loopfilter.c +++ b/libvpx/vpx_dsp/loopfilter.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" @@ -119,12 +120,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int count) { + const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; const int8_t mask = filter_mask(*limit, *blimit, @@ -138,18 +139,17 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, @@ -163,9 +163,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1); + vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, @@ -190,13 +189,12 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, } void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; @@ -213,16 +211,15 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, @@ -238,9 +235,8 @@ void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1); + vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter16(int8_t mask, uint8_t thresh, @@ -294,9 +290,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh, } } -void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { +static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -320,6 +316,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, } } +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); +} + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); +} + static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, @@ -450,12 +456,12 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int count, int bd) { + const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * p]; const uint16_t p2 = s[-3 * p]; const uint16_t p1 = s[-2 * p]; @@ -479,18 +485,18 @@ void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd); - vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, @@ -508,9 +514,9 @@ void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1, bd); + thresh1, bd); } static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, @@ -536,12 +542,12 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; @@ -564,16 +570,16 @@ void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd); - vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, @@ -596,9 +602,9 @@ void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1, bd); + thresh1, bd); } static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, @@ -664,9 +670,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, } } -void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { +static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -698,6 +706,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, } } +void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); +} + +void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); +} + static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, diff --git a/libvpx/vpx_dsp/mips/add_noise_msa.c b/libvpx/vpx_dsp/mips/add_noise_msa.c new file mode 100644 index 000000000..366770c0d --- /dev/null +++ b/libvpx/vpx_dsp/mips/add_noise_msa.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include "./macros_msa.h" + +void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise, + char blackclamp[16], char whiteclamp[16], + char bothclamp[16], uint32_t width, + uint32_t height, int32_t pitch) { + uint32_t i, j; + + for (i = 0; i < height / 2; ++i) { + uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch; + int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff)); + uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch; + int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff)); + for (j = width / 16; j--;) { + v16i8 temp00_s, temp01_s; + v16u8 temp00, temp01, black_clamp, white_clamp; + v16u8 pos0, ref0, pos1, ref1; + v16i8 const127 = __msa_ldi_b(127); + + pos0 = LD_UB(pos0_ptr); + ref0 = LD_UB(ref0_ptr); + pos1 = LD_UB(pos1_ptr); + ref1 = LD_UB(ref1_ptr); + black_clamp = (v16u8)__msa_fill_b(blackclamp[0]); + white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]); + temp00 = (pos0 < black_clamp); + pos0 = __msa_bmnz_v(pos0, black_clamp, temp00); + temp01 = (pos1 < black_clamp); + pos1 = __msa_bmnz_v(pos1, black_clamp, temp01); + XORI_B2_128_UB(pos0, pos1); + temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127); + temp00 = (v16u8)(temp00_s < pos0); + pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00); + temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127); + temp01 = (temp01_s < pos1); + pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01); + XORI_B2_128_UB(pos0, pos1); + pos0 += ref0; + ST_UB(pos0, pos0_ptr); + pos1 += ref1; + ST_UB(pos1, pos1_ptr); + pos0_ptr += 16; + pos1_ptr += 16; + ref0_ptr += 16; + ref1_ptr += 16; + } + } +} diff --git a/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c b/libvpx/vpx_dsp/mips/avg_msa.c index 611adb1a2..52a24ed37 100644 --- a/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c +++ b/libvpx/vpx_dsp/mips/avg_msa.c @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" -uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) { +uint32_t vpx_avg_8x8_msa(const uint8_t *src, int32_t src_stride) { uint32_t sum_out; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7; @@ -33,7 +33,7 @@ uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) { return sum_out; } -uint32_t vp9_avg_4x4_msa(const uint8_t *src, int32_t src_stride) { +uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) { uint32_t sum_out; uint32_t src0, src1, src2, src3; v16u8 vec = { 0 }; diff --git a/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c b/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c index 2115a348c..f29c14b3d 100644 --- a/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c +++ b/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c @@ -933,23 +933,21 @@ void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out, } void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) { - out[1] = 0; - - out[0] = LD_HADD(input, stride); - out[0] += LD_HADD(input + 8, stride); - out[0] += LD_HADD(input + 16, stride); - out[0] += LD_HADD(input + 24, stride); - out[0] += LD_HADD(input + 32 * 8, stride); - out[0] += LD_HADD(input + 32 * 8 + 8, stride); - out[0] += LD_HADD(input + 32 * 8 + 16, stride); - out[0] += LD_HADD(input + 32 * 8 + 24, stride); - out[0] += LD_HADD(input + 32 * 16, stride); - out[0] += LD_HADD(input + 32 * 16 + 8, stride); - out[0] += LD_HADD(input + 32 * 16 + 16, stride); - out[0] += LD_HADD(input + 32 * 16 + 24, stride); - out[0] += LD_HADD(input + 32 * 24, stride); - out[0] += LD_HADD(input + 32 * 24 + 8, stride); - out[0] += LD_HADD(input + 32 * 24 + 16, stride); - out[0] += LD_HADD(input + 32 * 24 + 24, stride); - out[0] >>= 3; + int sum = LD_HADD(input, stride); + sum += LD_HADD(input + 8, stride); + sum += LD_HADD(input + 16, stride); + sum += LD_HADD(input + 24, stride); + sum += LD_HADD(input + 32 * 8, stride); + sum += LD_HADD(input + 32 * 8 + 8, stride); + sum += LD_HADD(input + 32 * 8 + 16, stride); + sum += LD_HADD(input + 32 * 8 + 24, stride); + sum += LD_HADD(input + 32 * 16, stride); + sum += LD_HADD(input + 32 * 16 + 8, stride); + sum += LD_HADD(input + 32 * 16 + 16, stride); + sum += LD_HADD(input + 32 * 16 + 24, stride); + sum += LD_HADD(input + 32 * 24, stride); + sum += LD_HADD(input + 32 * 24 + 8, stride); + sum += LD_HADD(input + 32 * 24 + 16, stride); + sum += LD_HADD(input + 32 * 24 + 24, stride); + out[0] = (int16_t)(sum >> 3); } diff --git a/libvpx/vpx_dsp/mips/fwd_txfm_msa.c b/libvpx/vpx_dsp/mips/fwd_txfm_msa.c index f66dd5fce..0dd141f41 100644 --- a/libvpx/vpx_dsp/mips/fwd_txfm_msa.c +++ b/libvpx/vpx_dsp/mips/fwd_txfm_msa.c @@ -237,11 +237,9 @@ void vpx_fdct16x16_msa(const int16_t *input, int16_t *output, } void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) { - out[1] = 0; - - out[0] = LD_HADD(input, stride); - out[0] += LD_HADD(input + 8, stride); - out[0] += LD_HADD(input + 16 * 8, stride); - out[0] += LD_HADD(input + 16 * 8 + 8, stride); - out[0] >>= 1; + int sum = LD_HADD(input, stride); + sum += LD_HADD(input + 8, stride); + sum += LD_HADD(input + 16 * 8, stride); + sum += LD_HADD(input + 16 * 8 + 8, stride); + out[0] = (int16_t)(sum >> 1); } diff --git a/libvpx/vpx_dsp/mips/loopfilter_16_msa.c b/libvpx/vpx_dsp/mips/loopfilter_16_msa.c index b7c9f7bd0..a6c581d72 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_16_msa.c +++ b/libvpx/vpx_dsp/mips/loopfilter_16_msa.c @@ -423,11 +423,11 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, } } -void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, - const uint8_t *b_limit_ptr, - const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { +static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr, + int32_t count) { if (1 == count) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; uint64_t dword0, dword1; @@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, } } +void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr) { + mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1); +} + +void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr) { + mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2); +} + static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch, uint8_t *output, int32_t out_pitch) { v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org; diff --git a/libvpx/vpx_dsp/mips/loopfilter_4_msa.c b/libvpx/vpx_dsp/mips/loopfilter_4_msa.c index daf5f38bf..936347031 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_4_msa.c +++ b/libvpx/vpx_dsp/mips/loopfilter_4_msa.c @@ -13,14 +13,11 @@ void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { uint64_t p1_d, p0_d, q0_d, q1_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out; - (void)count; - /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); @@ -74,14 +71,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { v16u8 mask, hev, flat, limit, thresh, b_limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v8i16 vec0, vec1, vec2, vec3; - (void)count; - LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); diff --git a/libvpx/vpx_dsp/mips/loopfilter_8_msa.c b/libvpx/vpx_dsp/mips/loopfilter_8_msa.c index 00b6db550..5b22bd002 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_8_msa.c +++ b/libvpx/vpx_dsp/mips/loopfilter_8_msa.c @@ -13,8 +13,7 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; @@ -23,8 +22,6 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r; v16i8 zero = { 0 }; - (void)count; - /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); @@ -161,8 +158,7 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p1_out, p0_out, q0_out, q1_out; v16u8 flat, mask, hev, thresh, b_limit, limit; @@ -171,8 +167,6 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, v16u8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3, vec4; - (void)count; - /* load vector elements */ LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3); diff --git a/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c index 99a96d89b..8414b9ed5 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask; uint32_t hev; @@ -117,8 +116,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; @@ -313,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, @@ -324,8 +322,8 @@ void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, @@ -335,8 +333,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, @@ -346,9 +344,8 @@ void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, - 1); + vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, diff --git a/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c index 4138f5697..dd0545eed 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c +++ b/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c @@ -23,8 +23,7 @@ void vpx_lpf_horizontal_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint32_t mask; uint32_t hev, flat; uint8_t i; @@ -322,8 +321,7 @@ void vpx_lpf_vertical_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev, flat; uint8_t *s1, *s2, *s3, *s4; diff --git a/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c index 8a4865073..85e167ca0 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c +++ b/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c @@ -19,12 +19,12 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vpx_lpf_horizontal_16_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +static void mb_lpf_horizontal_edge(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint32_t mask; uint32_t hev, flat, flat2; uint8_t i; @@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s, s = s + 4; } } + +void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1); +} + +void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2); +} #endif // #if HAVE_DSPR2 diff --git a/libvpx/vpx_dsp/psnrhvs.c b/libvpx/vpx_dsp/psnrhvs.c index 300170579..0ffa1b251 100644 --- a/libvpx/vpx_dsp/psnrhvs.c +++ b/libvpx/vpx_dsp/psnrhvs.c @@ -200,6 +200,8 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, } } } + if (pixels <=0) + return 0; ret /= pixels; return ret; } diff --git a/libvpx/vpx_dsp/quantize.c b/libvpx/vpx_dsp/quantize.c index e4e741a90..80fcd66b0 100644 --- a/libvpx/vpx_dsp/quantize.c +++ b/libvpx/vpx_dsp/quantize.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/quantize.h" #include "vpx_mem/vpx_mem.h" @@ -52,7 +53,7 @@ void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[0]; - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16); + const int abs_qcoeff = (int)((tmp * quant) >> 16); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr; if (abs_qcoeff) @@ -108,7 +109,7 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15); + const int abs_qcoeff = (int)((tmp * quant) >> 15); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2; if (abs_qcoeff) diff --git a/libvpx/vpx_dsp/sad.c b/libvpx/vpx_dsp/sad.c index c0c3ff996..f1f951f14 100644 --- a/libvpx/vpx_dsp/sad.c +++ b/libvpx/vpx_dsp/sad.c @@ -33,47 +33,6 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride, return sad; } -// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up. -/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred. - * The function averages every corresponding element of the buffers and stores - * the value in a third buffer, comp_pred. - * pred and comp_pred are assumed to have stride = width - * In the usage below comp_pred is a local array. - */ -static INLINE void avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { - int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - #define sadMxN(m, n) \ unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ @@ -83,7 +42,7 @@ unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ uint8_t comp_pred[m * n]; \ - avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ + vpx_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \ return sad(src, src_stride, comp_pred, m, m, n); \ } @@ -221,7 +180,7 @@ unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \ int ref_stride, \ const uint8_t *second_pred) { \ uint16_t comp_pred[m * n]; \ - highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ + vpx_highbd_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \ return highbd_sadb(src, src_stride, comp_pred, m, m, n); \ } diff --git a/libvpx/vpx_dsp/variance.c b/libvpx/vpx_dsp/variance.c index e8bddb0a0..d960c5435 100644 --- a/libvpx/vpx_dsp/variance.c +++ b/libvpx/vpx_dsp/variance.c @@ -275,7 +275,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, #if CONFIG_VP9_HIGHBITDEPTH static void highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, - int w, int h, uint64_t *sse, uint64_t *sum) { + int w, int h, uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -298,7 +298,7 @@ static void highbd_8_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; - uint64_t sum_long = 0; + int64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)sse_long; *sum = (int)sum_long; @@ -308,7 +308,7 @@ static void highbd_10_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; - uint64_t sum_long = 0; + int64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); @@ -318,7 +318,7 @@ static void highbd_12_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; - uint64_t sum_long = 0; + int64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); @@ -341,8 +341,10 @@ uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \ int b_stride, \ uint32_t *sse) { \ int sum; \ + int64_t var; \ highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ @@ -351,8 +353,10 @@ uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ int b_stride, \ uint32_t *sse) { \ int sum; \ + int64_t var; \ highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } #define HIGHBD_GET_VAR(S) \ diff --git a/libvpx/vpx_dsp/variance.h b/libvpx/vpx_dsp/variance.h index cd0fd9878..c18d9b48f 100644 --- a/libvpx/vpx_dsp/variance.h +++ b/libvpx/vpx_dsp/variance.h @@ -74,7 +74,7 @@ typedef struct variance_vtable { } vp8_variance_fn_ptr_t; #endif // CONFIG_VP8 -#if CONFIG_VP9 || CONFIG_VP10 +#if CONFIG_VP9 typedef struct vp9_variance_vtable { vpx_sad_fn_t sdf; vpx_sad_avg_fn_t sdaf; @@ -85,7 +85,7 @@ typedef struct vp9_variance_vtable { vpx_sad_multi_fn_t sdx8f; vpx_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -#endif // CONFIG_VP9 || CONFIG_VP10 +#endif // CONFIG_VP9 #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vpx_dsp/vpx_dsp.mk b/libvpx/vpx_dsp/vpx_dsp.mk index 9620eaa03..84b529136 100644 --- a/libvpx/vpx_dsp/vpx_dsp.mk +++ b/libvpx/vpx_dsp/vpx_dsp.mk @@ -52,6 +52,12 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm endif # CONFIG_USE_X86INC endif # CONFIG_VP9_HIGHBITDEPTH +ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),) +DSP_SRCS-yes += add_noise.c +DSP_SRCS-$(HAVE_MSA) += mips/add_noise_msa.c +DSP_SRCS-$(HAVE_SSE2) += x86/add_noise_sse2.asm +endif # CONFIG_POSTPROC + DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM) DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c @@ -128,7 +134,6 @@ DSP_SRCS-yes += loopfilter.c DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/loopfilter_sse2.c DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c -DSP_SRCS-$(HAVE_MMX) += x86/loopfilter_mmx.asm DSP_SRCS-$(HAVE_NEON) += arm/loopfilter_neon.c ifeq ($(HAVE_NEON_ASM),yes) @@ -164,7 +169,7 @@ DSP_SRCS-yes += txfm_common.h DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h # forward transform -ifneq ($(filter yes,$(CONFIG_VP9_ENCODER) $(CONFIG_VP10_ENCODER)),) +ifeq ($(CONFIG_VP9_ENCODER),yes) DSP_SRCS-yes += fwd_txfm.c DSP_SRCS-yes += fwd_txfm.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h @@ -182,10 +187,10 @@ DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c -endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +endif # CONFIG_VP9_ENCODER # inverse transform -ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),) +ifeq ($(CONFIG_VP9),yes) DSP_SRCS-yes += inv_txfm.h DSP_SRCS-yes += inv_txfm.c DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h @@ -235,10 +240,10 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c endif # CONFIG_VP9_HIGHBITDEPTH -endif # CONFIG_VP9 || CONFIG_VP10 +endif # CONFIG_VP9 # quantization -ifneq ($(filter yes, $(CONFIG_VP9_ENCODER) $(CONFIG_VP10_ENCODER)),) +ifeq ($(CONFIG_VP9_ENCODER),yes) DSP_SRCS-yes += quantize.c DSP_SRCS-yes += quantize.h @@ -252,7 +257,20 @@ DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm endif endif -endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER + +# avg +DSP_SRCS-yes += avg.c +DSP_SRCS-$(HAVE_SSE2) += x86/avg_intrin_sse2.c +DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c +DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c +DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c +ifeq ($(ARCH_X86_64),yes) +ifeq ($(CONFIG_USE_X86INC),yes) +DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm +endif +endif + +endif # CONFIG_VP9_ENCODER ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += sad.c @@ -266,7 +284,6 @@ DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c -DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm @@ -304,8 +321,6 @@ DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c -DSP_SRCS-$(HAVE_MMX) += x86/variance_mmx.c -DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3 DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_sse2.c diff --git a/libvpx/vpx_dsp/vpx_dsp_common.h b/libvpx/vpx_dsp/vpx_dsp_common.h index a9e180e79..a1d0a51ef 100644 --- a/libvpx/vpx_dsp/vpx_dsp_common.h +++ b/libvpx/vpx_dsp/vpx_dsp_common.h @@ -8,12 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_COMMON_H_ -#define VPX_DSP_COMMON_H_ +#ifndef VPX_DSP_VPX_DSP_COMMON_H_ +#define VPX_DSP_VPX_DSP_COMMON_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus @@ -67,4 +66,4 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) { } // extern "C" #endif -#endif // VPX_DSP_COMMON_H_ +#endif // VPX_DSP_VPX_DSP_COMMON_H_ diff --git a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl index b369b0548..37239a195 100644 --- a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -55,13 +55,13 @@ if ($opts{arch} eq "x86_64") { # add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc"; +specialize qw/vpx_d207_predictor_4x4/, "$sse2_x86inc"; add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207e_predictor_4x4/; add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc"; +specialize qw/vpx_d45_predictor_4x4 neon/, "$sse2_x86inc"; add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_4x4/; @@ -76,7 +76,7 @@ add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d63f_predictor_4x4/; add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; +specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_he_predictor_4x4/; @@ -91,25 +91,25 @@ add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc"; +specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_ve_predictor_4x4/; add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; +specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; +specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc"; add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; +specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc"; add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; +specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc"; add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; +specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc"; add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; @@ -118,7 +118,7 @@ add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, c specialize qw/vpx_d207e_predictor_8x8/; add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc"; +specialize qw/vpx_d45_predictor_8x8 neon/, "$sse2_x86inc"; add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_8x8/; @@ -130,7 +130,7 @@ add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d63e_predictor_8x8/; add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_8x8/; @@ -142,22 +142,22 @@ add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc"; +specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; +specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; +specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; +specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; +specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; @@ -178,7 +178,7 @@ add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vpx_d63e_predictor_16x16/; add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; +specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc"; add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_16x16/; @@ -226,7 +226,7 @@ add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vpx_d63e_predictor_32x32/; add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; +specialize qw/vpx_h_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_32x32/; @@ -241,7 +241,7 @@ add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; +specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; @@ -288,13 +288,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_d153_predictor_4x4/; add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc"; + specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; + specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; + specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_4x4/; @@ -387,7 +387,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; + specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; @@ -435,10 +435,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; + specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; + specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_32x32/; @@ -535,32 +535,36 @@ add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8 specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon; -add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon; -add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/; +add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/; -add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/; -$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon; +add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/; +$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon; -add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/; +$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon; + +add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon; -add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/; +add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/; @@ -572,28 +576,31 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/; - add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_8 sse2/; add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/; - add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_4 sse2/; add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/; + + add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_4 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; @@ -607,7 +614,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Forward transform # -if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) { +if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_fdct4x4 sse2/; @@ -687,11 +694,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_fdct32x32_1 sse2 msa/; } # CONFIG_VP9_HIGHBITDEPTH -} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +} # CONFIG_VP9_ENCODER # # Inverse transform -if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) { +if (vpx_config("CONFIG_VP9") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. @@ -699,7 +706,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_iwht4x4_1_add/; add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_iwht4x4_16_add/; + specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add/; @@ -754,12 +761,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1024_add/; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_135_add/; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_34_add/; add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1_add/; - + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct4x4_16_add/; @@ -782,10 +792,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct4x4_1_add sse2/; add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_64_add sse2/; + specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc"; add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_12_add sse2/; + specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc"; add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct8x8_1_add sse2/; @@ -800,10 +810,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct16x16_1_add sse2/; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1024_add sse2/; + specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc"; + + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc"; + # Need to add 135 eob idct32x32 implementations. + $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_34_add sse2/; + specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc"; add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1_add sse2/; @@ -853,6 +868,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1024_add/; + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_135_add/; + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_34_add/; @@ -890,12 +908,20 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + + add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + # Need to add 135 eob idct32x32 implementations. + $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; + $vpx_idct32x32_135_add_neon=vpx_idct32x32_1024_add_neon; + $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; + $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/; + specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; # Need to add 34 eob idct32x32 neon implementation. - $vpx_idct32x32_34_add_neon_asm=vpx_idct32x32_1024_add_neon; + $vpx_idct32x32_34_add_neon=vpx_idct32x32_1024_add_neon; add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; @@ -907,12 +933,12 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc"; } # CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH -} # CONFIG_VP9 || CONFIG_VP10 +} # CONFIG_VP9 # # Quantization # -if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) { +if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; @@ -926,7 +952,7 @@ if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCO add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vpx_highbd_quantize_b_32x32 sse2/; } # CONFIG_VP9_HIGHBITDEPTH -} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +} # CONFIG_VP9_ENCODER if (vpx_config("CONFIG_ENCODERS") eq "yes") { # @@ -957,29 +983,58 @@ add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad4x8 msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc"; # # Avg # +if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { + add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p"; + specialize qw/vpx_avg_8x8 sse2 neon msa/; + + add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p"; + specialize qw/vpx_avg_4x4 sse2 neon msa/; + + add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; + specialize qw/vpx_minmax_8x8 sse2 neon/; + + add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc"; + + add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; + specialize qw/vpx_hadamard_16x16 sse2 neon/; + + add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; + specialize qw/vpx_satd sse2 neon/; + + add_proto qw/void vpx_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height"; + specialize qw/vpx_int_pro_row sse2 neon/; + + add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width"; + specialize qw/vpx_int_pro_col sse2 neon/; + + add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl"; + specialize qw/vpx_vector_var neon sse2/; +} # CONFIG_VP9_ENCODER + add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc"; @@ -1014,10 +1069,10 @@ add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stri specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x8_avg msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc"; add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x4_avg msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc"; # # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally @@ -1109,10 +1164,10 @@ add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc"; add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc"; add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc"; +specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc"; # # Structured Similarity (SSIM) @@ -1177,6 +1232,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Avg # + add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_8x8/; + add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_4x4/; + add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; + specialize qw/vpx_highbd_minmax_8x8/; + add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc"; @@ -1345,16 +1407,16 @@ add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int sourc specialize qw/vpx_variance16x32 sse2 msa/; add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/; + specialize qw/vpx_variance16x16 sse2 avx2 media neon msa/; add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 mmx sse2 neon msa/; + specialize qw/vpx_variance16x8 sse2 neon msa/; add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 mmx sse2 neon msa/; + specialize qw/vpx_variance8x16 sse2 neon msa/; add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 mmx sse2 media neon msa/; + specialize qw/vpx_variance8x8 sse2 media neon msa/; add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_variance8x4 sse2 msa/; @@ -1363,7 +1425,7 @@ add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_ specialize qw/vpx_variance4x8 sse2 msa/; add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 mmx sse2 msa/; + specialize qw/vpx_variance4x4 sse2 msa/; # # Specialty Variance @@ -1372,10 +1434,10 @@ add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, specialize qw/vpx_get16x16var sse2 avx2 neon msa/; add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var mmx sse2 neon msa/; + specialize qw/vpx_get8x8var sse2 neon msa/; add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/; + specialize qw/vpx_mse16x16 sse2 avx2 media neon msa/; add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vpx_mse16x8 sse2 msa/; @@ -1387,7 +1449,7 @@ add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stri specialize qw/vpx_mse8x8 sse2 msa/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; - specialize qw/vpx_get_mb_ss mmx sse2 msa/; + specialize qw/vpx_get_mb_ss sse2 msa/; add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; specialize qw/vpx_get4x4sse_cs neon msa/; @@ -1416,25 +1478,25 @@ add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -1470,22 +1532,22 @@ add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, i specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; # # Specialty Subpixel # add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/; + specialize qw/vpx_variance_halfpixvar16x16_h sse2 media/; add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/; + specialize qw/vpx_variance_halfpixvar16x16_v sse2 media/; add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/; + specialize qw/vpx_variance_halfpixvar16x16_hv sse2 media/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; @@ -1845,6 +1907,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; } # CONFIG_VP9_HIGHBITDEPTH + +# +# Post Processing +# +if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { + add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; + specialize qw/vpx_plane_add_noise sse2 msa/; +} + } # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC 1; diff --git a/libvpx/vpx_dsp/x86/add_noise_sse2.asm b/libvpx/vpx_dsp/x86/add_noise_sse2.asm new file mode 100644 index 000000000..ff61b19ba --- /dev/null +++ b/libvpx/vpx_dsp/x86/add_noise_sse2.asm @@ -0,0 +1,83 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise, +; unsigned char blackclamp[16], +; unsigned char whiteclamp[16], +; unsigned char bothclamp[16], +; unsigned int width, unsigned int height, +; int pitch) +global sym(vpx_plane_add_noise_sse2) PRIVATE +sym(vpx_plane_add_noise_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ; get the clamps in registers + mov rdx, arg(2) ; blackclamp + movdqu xmm3, [rdx] + mov rdx, arg(3) ; whiteclamp + movdqu xmm4, [rdx] + mov rdx, arg(4) ; bothclamp + movdqu xmm5, [rdx] + +.addnoise_loop: + call sym(LIBVPX_RAND) WRT_PLT + mov rcx, arg(1) ;noise + and rax, 0xff + add rcx, rax + + mov rdi, rcx + movsxd rcx, dword arg(5) ;[Width] + mov rsi, arg(0) ;Pos + xor rax,rax + +.addnoise_nextset: + movdqu xmm1,[rsi+rax] ; get the source + + psubusb xmm1, xmm3 ; subtract black clamp + paddusb xmm1, xmm5 ; add both clamp + psubusb xmm1, xmm4 ; subtract whiteclamp + + movdqu xmm2,[rdi+rax] ; get the noise for this line + paddb xmm1,xmm2 ; add it in + movdqu [rsi+rax],xmm1 ; store the result + + add rax,16 ; move to the next line + + cmp rax, rcx + jl .addnoise_nextset + + movsxd rax, dword arg(7) ; Pitch + add arg(0), rax ; Start += Pitch + sub dword arg(6), 1 ; Height -= 1 + jg .addnoise_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +rd42: + times 8 dw 0x04 +four8s: + times 4 dd 8 diff --git a/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/libvpx/vpx_dsp/x86/avg_intrin_sse2.c index 4531d794a..f9af6cf97 100644 --- a/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/libvpx/vpx_dsp/x86/avg_intrin_sse2.c @@ -10,10 +10,10 @@ #include <emmintrin.h> -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, +void vpx_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max) { __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff; u0 = _mm_setzero_si128(); @@ -91,7 +91,7 @@ void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, *min = _mm_extract_epi16(minabsdiff, 0); } -unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { +unsigned int vpx_avg_8x8_sse2(const uint8_t *s, int p) { __m128i s0, s1, u0; unsigned int avg = 0; u0 = _mm_setzero_si128(); @@ -118,7 +118,7 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { return (avg + 32) >> 6; } -unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) { +unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) { __m128i s0, s1, u0; unsigned int avg = 0; u0 = _mm_setzero_si128(); @@ -212,7 +212,7 @@ static void hadamard_col8_sse2(__m128i *in, int iter) { } } -void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, +void vpx_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff) { __m128i src[8]; src[0] = _mm_load_si128((const __m128i *)src_diff); @@ -244,13 +244,13 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, _mm_store_si128((__m128i *)coeff, src[7]); } -void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, +void vpx_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; - vp9_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64); + vpx_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64); } for (idx = 0; idx < 64; idx += 8) { @@ -283,34 +283,33 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, } } -int16_t vp9_satd_sse2(const int16_t *coeff, int length) { +int vpx_satd_sse2(const int16_t *coeff, int length) { int i; - __m128i sum = _mm_load_si128((const __m128i *)coeff); - __m128i sign = _mm_srai_epi16(sum, 15); - __m128i val = _mm_xor_si128(sum, sign); - sum = _mm_sub_epi16(val, sign); - coeff += 8; - - for (i = 8; i < length; i += 8) { - __m128i src_line = _mm_load_si128((const __m128i *)coeff); - sign = _mm_srai_epi16(src_line, 15); - val = _mm_xor_si128(src_line, sign); - val = _mm_sub_epi16(val, sign); - sum = _mm_add_epi16(sum, val); + const __m128i zero = _mm_setzero_si128(); + __m128i accum = zero; + + for (i = 0; i < length; i += 8) { + const __m128i src_line = _mm_load_si128((const __m128i *)coeff); + const __m128i inv = _mm_sub_epi16(zero, src_line); + const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line) + const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero); + const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero); + const __m128i sum = _mm_add_epi32(abs_lo, abs_hi); + accum = _mm_add_epi32(accum, sum); coeff += 8; } - val = _mm_srli_si128(sum, 8); - sum = _mm_add_epi16(sum, val); - val = _mm_srli_epi64(sum, 32); - sum = _mm_add_epi16(sum, val); - val = _mm_srli_epi32(sum, 16); - sum = _mm_add_epi16(sum, val); + { // cascading summation of accum + __m128i hi = _mm_srli_si128(accum, 8); + accum = _mm_add_epi32(accum, hi); + hi = _mm_srli_epi64(accum, 32); + accum = _mm_add_epi32(accum, hi); + } - return _mm_extract_epi16(sum, 0); + return _mm_cvtsi128_si32(accum); } -void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, +void vpx_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, const int ref_stride, const int height) { int idx; __m128i zero = _mm_setzero_si128(); @@ -359,7 +358,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, _mm_storeu_si128((__m128i *)hbuf, s1); } -int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { +int16_t vpx_int_pro_col_sse2(uint8_t const *ref, const int width) { __m128i zero = _mm_setzero_si128(); __m128i src_line = _mm_load_si128((const __m128i *)ref); __m128i s0 = _mm_sad_epu8(src_line, zero); @@ -379,7 +378,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { return _mm_extract_epi16(s0, 0); } -int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, +int vpx_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl) { int idx; int width = 4 << bwl; diff --git a/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm index 74c52df19..26412e8e4 100644 --- a/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ b/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm @@ -8,11 +8,11 @@ ; be found in the AUTHORS file in the root of the source tree. ; -%define private_prefix vp9 +%define private_prefix vpx %include "third_party/x86inc/x86inc.asm" -; This file provides SSSE3 version of the forward transformation. Part +; This file provides SSSE3 version of the hadamard transformation. Part ; of the macro definitions are originally derived from the ffmpeg project. ; The current version applies to x86 64-bit only. diff --git a/libvpx/vpx_dsp/x86/convolve.h b/libvpx/vpx_dsp/x86/convolve.h index b6fbfcf92..7e43eb7c7 100644 --- a/libvpx/vpx_dsp/x86/convolve.h +++ b/libvpx/vpx_dsp/x86/convolve.h @@ -33,7 +33,7 @@ typedef void filter8_1dfunction ( int w, int h) { \ assert(filter[3] != 128); \ assert(step_q4 == 16); \ - if (filter[0] || filter[1] || filter[2]) { \ + if (filter[0] | filter[1] | filter[2]) { \ while (w >= 16) { \ vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ src_stride, \ @@ -45,27 +45,20 @@ typedef void filter8_1dfunction ( dst += 16; \ w -= 16; \ } \ - while (w >= 8) { \ + if (w == 8) { \ vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ src_stride, \ dst, \ dst_stride, \ h, \ filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ + } else if (w == 4) { \ vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ src_stride, \ dst, \ dst_stride, \ h, \ filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ } \ } else { \ while (w >= 16) { \ @@ -79,27 +72,20 @@ typedef void filter8_1dfunction ( dst += 16; \ w -= 16; \ } \ - while (w >= 8) { \ + if (w == 8) { \ vpx_filter_block1d8_##dir##2_##avg##opt(src, \ src_stride, \ dst, \ dst_stride, \ h, \ filter); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ + } else if (w == 4) { \ vpx_filter_block1d4_##dir##2_##avg##opt(src, \ src_stride, \ dst, \ dst_stride, \ h, \ filter); \ - src += 4; \ - dst += 4; \ - w -= 4; \ } \ } \ } @@ -116,8 +102,7 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ assert(h <= 64); \ assert(x_step_q4 == 16); \ assert(y_step_q4 == 16); \ - if (filter_x[0] || filter_x[1] || filter_x[2]|| \ - filter_y[0] || filter_y[1] || filter_y[2]) { \ + if (filter_x[0] | filter_x[1] | filter_x[2]) { \ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ filter_x, x_step_q4, filter_y, y_step_q4, \ @@ -161,7 +146,7 @@ typedef void highbd_filter8_1dfunction ( if (step_q4 == 16 && filter[3] != 128) { \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ - if (filter[0] || filter[1] || filter[2]) { \ + if (filter[0] | filter[1] | filter[2]) { \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ src_stride, \ @@ -253,8 +238,7 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ assert(w <= 64); \ assert(h <= 64); \ if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ - filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ + if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ CONVERT_TO_BYTEPTR(fdata2), 64, \ diff --git a/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h b/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h index 4df39dff8..951af3a62 100644 --- a/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h +++ b/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h @@ -10,6 +10,7 @@ #include <immintrin.h> // AVX2 +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #define pair256_set_epi16(a, b) \ diff --git a/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c b/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c index bca72e874..3e4f49bd9 100644 --- a/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c +++ b/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c @@ -11,6 +11,7 @@ #include <emmintrin.h> // SSE2 #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/fwd_txfm_sse2.h" @@ -40,7 +41,7 @@ void vpx_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { in1 = _mm_add_epi32(tmp, in0); in0 = _mm_slli_epi32(in1, 1); - store_output(&in0, output); + output[0] = (tran_low_t)_mm_cvtsi128_si32(in0); } void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { @@ -80,7 +81,7 @@ void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { in0 = _mm_srli_si128(sum, 8); in1 = _mm_add_epi32(sum, in0); - store_output(&in1, output); + output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, @@ -91,40 +92,39 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int i; for (i = 0; i < 2; ++i) { - input += 8 * i; - in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 0 * stride + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 0 * stride + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 1 * stride + 0)); + in3 = _mm_load_si128((const __m128i *)(input + 1 * stride + 8)); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 2 * stride + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 2 * stride + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 3 * stride + 0)); + in3 = _mm_load_si128((const __m128i *)(input + 3 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 8 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 9 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 10 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 11 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 4 * stride + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 4 * stride + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 5 * stride + 0)); + in3 = _mm_load_si128((const __m128i *)(input + 5 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 12 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 13 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 14 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 15 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 6 * stride + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 6 * stride + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 7 * stride + 0)); + in3 = _mm_load_si128((const __m128i *)(input + 7 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); @@ -132,6 +132,7 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, sum = _mm_add_epi16(sum, u0); sum = _mm_add_epi16(sum, u1); + input += 8 * stride; } u0 = _mm_setzero_si128(); @@ -149,7 +150,7 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, in1 = _mm_add_epi32(sum, in0); in1 = _mm_srai_epi32(in1, 1); - store_output(&in1, output); + output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, @@ -221,7 +222,7 @@ void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, in1 = _mm_add_epi32(sum, in0); in1 = _mm_srai_epi32(in1, 3); - store_output(&in1, output); + output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } #define DCT_HIGH_BIT_DEPTH 0 diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c index 5782155bf..4a8fb6df7 100644 --- a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c +++ b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> + #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" @@ -40,7 +42,9 @@ uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src, &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); + assert(xsum0 <= 255 * 16 * 16); + assert(xsum0 >= -255 * 16 * 16); + return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8)); } uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src, @@ -54,7 +58,9 @@ uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src, &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); + assert(xsum0 <= 255 * 16 * 16); + assert(xsum0 >= -255 * 16 * 16); + return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8)); } @@ -70,5 +76,7 @@ uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src, &xsum0, &xxsum0); *sse = xxsum0; - return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); + assert(xsum0 <= 255 * 16 * 16); + assert(xsum0 >= -255 * 16 * 16); + return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8)); } diff --git a/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm b/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm index b12d29c0a..c61b62104 100644 --- a/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm +++ b/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm @@ -17,24 +17,20 @@ pw_16: times 4 dd 16 pw_32: times 4 dd 32 SECTION .text -INIT_MMX sse +INIT_XMM sse2 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset GET_GOT goffsetq movq m0, [aboveq] movq m2, [leftq] - DEFINE_ARGS dst, stride, one - mov oned, 0x0001 - pxor m1, m1 - movd m3, oned - pshufw m3, m3, 0x0 paddw m0, m2 - pmaddwd m0, m3 - packssdw m0, m1 - pmaddwd m0, m3 + pshuflw m1, m0, 0xe + paddw m0, m1 + pshuflw m1, m0, 0x1 + paddw m0, m1 paddw m0, [GLOBAL(pw_4)] psraw m0, 3 - pshufw m0, m0, 0x0 + pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq*2], m0 lea dstq, [dstq+strideq*4] @@ -122,30 +118,29 @@ cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset RESTORE_GOT REP_RET -%if ARCH_X86_64 INIT_XMM sse2 -cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset +cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset GET_GOT goffsetq - pxor m1, m1 mova m0, [aboveq] mova m2, [aboveq+16] mova m3, [aboveq+32] mova m4, [aboveq+48] - mova m5, [leftq] - mova m6, [leftq+16] - mova m7, [leftq+32] - mova m8, [leftq+48] + paddw m0, m2 + paddw m3, m4 + mova m2, [leftq] + mova m4, [leftq+16] + mova m5, [leftq+32] + mova m6, [leftq+48] + paddw m2, m4 + paddw m5, m6 + paddw m0, m3 + paddw m2, m5 + pxor m1, m1 + paddw m0, m2 DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 - paddw m0, m2 - paddw m0, m3 - paddw m0, m4 - paddw m0, m5 - paddw m0, m6 - paddw m0, m7 - paddw m0, m8 movhlps m2, m0 paddw m0, m2 punpcklwd m0, m1 @@ -181,9 +176,8 @@ cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset RESTORE_GOT REP_RET -%endif -INIT_MMX sse +INIT_XMM sse2 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above movq m0, [aboveq] movq [dstq ], m0 @@ -261,43 +255,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above jnz .loop REP_RET -INIT_MMX sse -cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one +INIT_XMM sse2 +cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps movd m1, [aboveq-2] movq m0, [aboveq] - pshufw m1, m1, 0x0 + pshuflw m1, m1, 0x0 + movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4 + movlhps m1, m1 ; tl tl tl tl tl tl tl tl ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - movd m3, oned + pcmpeqw m3, m3 movd m4, bpsd - pshufw m3, m3, 0x0 - DEFINE_ARGS dst, stride, line, left - mov lineq, -2 - mova m2, m3 + psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl psllw m3, m4 - add leftq, 8 - psubw m3, m2 ; max possible value - pxor m4, m4 ; min possible value - psubw m0, m1 -.loop: - movq m1, [leftq+lineq*4] - movq m2, [leftq+lineq*4+2] - pshufw m1, m1, 0x0 - pshufw m2, m2, 0x0 - paddw m1, m0 + pcmpeqw m2, m2 + pxor m4, m4 ; min possible value + pxor m3, m2 ; max possible value + mova m1, [leftq] + pshuflw m2, m1, 0x0 + pshuflw m5, m1, 0x55 + movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2 paddw m2, m0 ;Clamp to the bit-depth - pminsw m1, m3 pminsw m2, m3 - pmaxsw m1, m4 pmaxsw m2, m4 ;Store the values - movq [dstq ], m1 - movq [dstq+strideq*2], m2 + movq [dstq ], m2 + movhpd [dstq+strideq*2], m2 lea dstq, [dstq+strideq*4] - inc lineq - jnz .loop - REP_RET + pshuflw m2, m1, 0xaa + pshuflw m5, m1, 0xff + movlhps m2, m5 + paddw m2, m0 + ;Clamp to the bit-depth + pminsw m2, m3 + pmaxsw m2, m4 + ;Store the values + movq [dstq ], m2 + movhpd [dstq+strideq*2], m2 + RET INIT_XMM sse2 cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one @@ -343,63 +338,55 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one jnz .loop REP_RET -%if ARCH_X86_64 INIT_XMM sse2 -cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one +cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps movd m2, [aboveq-2] mova m0, [aboveq] mova m1, [aboveq+16] pshuflw m2, m2, 0x0 ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - pxor m7, m7 - pxor m8, m8 - pinsrw m7, oned, 0 - pinsrw m8, bpsd, 0 - pshuflw m7, m7, 0x0 + pcmpeqw m3, m3 + movd m4, bpsd + punpcklqdq m2, m2 + psllw m3, m4 + pcmpeqw m5, m5 + pxor m4, m4 ; min possible value + pxor m3, m5 ; max possible value DEFINE_ARGS dst, stride, line, left - punpcklqdq m7, m7 mov lineq, -8 - mova m5, m7 - punpcklqdq m2, m2 - psllw m7, m8 - add leftq, 32 - psubw m7, m5 ; max possible value - pxor m8, m8 ; min possible value psubw m0, m2 psubw m1, m2 .loop: - movd m2, [leftq+lineq*4] - movd m3, [leftq+lineq*4+2] - pshuflw m2, m2, 0x0 - pshuflw m3, m3, 0x0 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - paddw m4, m2, m0 - paddw m5, m3, m0 + movd m7, [leftq] + pshuflw m5, m7, 0x0 + pshuflw m2, m7, 0x55 + punpcklqdq m5, m5 ; l1 l1 l1 l1 l1 l1 l1 l1 + punpcklqdq m2, m2 ; l2 l2 l2 l2 l2 l2 l2 l2 + paddw m6, m5, m0 ; t1-tl+l1 to t4-tl+l1 + paddw m5, m1 ; t5-tl+l1 to t8-tl+l1 + pminsw m6, m3 + pminsw m5, m3 + pmaxsw m6, m4 ; Clamp to the bit-depth + pmaxsw m5, m4 + mova [dstq ], m6 + mova [dstq +16], m5 + paddw m6, m2, m0 paddw m2, m1 - paddw m3, m1 - ;Clamp to the bit-depth - pminsw m4, m7 - pminsw m5, m7 - pminsw m2, m7 - pminsw m3, m7 - pmaxsw m4, m8 - pmaxsw m5, m8 - pmaxsw m2, m8 - pmaxsw m3, m8 - ;Store the values - mova [dstq ], m4 - mova [dstq+strideq*2 ], m5 - mova [dstq +16], m2 - mova [dstq+strideq*2+16], m3 + pminsw m6, m3 + pminsw m2, m3 + pmaxsw m6, m4 + pmaxsw m2, m4 + mova [dstq+strideq*2 ], m6 + mova [dstq+strideq*2+16], m2 lea dstq, [dstq+strideq*4] inc lineq + lea leftq, [leftq+4] + jnz .loop REP_RET INIT_XMM sse2 -cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one +cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps movd m0, [aboveq-2] mova m1, [aboveq] mova m2, [aboveq+16] @@ -407,70 +394,60 @@ cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one mova m4, [aboveq+48] pshuflw m0, m0, 0x0 ; Get the values to compute the maximum value at this bit depth - mov oned, 1 - pxor m10, m10 - pxor m11, m11 - pinsrw m10, oned, 0 - pinsrw m11, bpsd, 0 - pshuflw m10, m10, 0x0 + pcmpeqw m5, m5 + movd m6, bpsd + psllw m5, m6 + pcmpeqw m7, m7 + pxor m6, m6 ; min possible value + pxor m5, m7 ; max possible value + punpcklqdq m0, m0 DEFINE_ARGS dst, stride, line, left - punpcklqdq m10, m10 mov lineq, -16 - mova m5, m10 - punpcklqdq m0, m0 - psllw m10, m11 - add leftq, 64 - psubw m10, m5 ; max possible value - pxor m11, m11 ; min possible value psubw m1, m0 psubw m2, m0 psubw m3, m0 psubw m4, m0 .loop: - movd m5, [leftq+lineq*4] - movd m6, [leftq+lineq*4+2] - pshuflw m5, m5, 0x0 - pshuflw m6, m6, 0x0 - punpcklqdq m5, m5 - punpcklqdq m6, m6 - paddw m7, m5, m1 - paddw m8, m5, m2 - paddw m9, m5, m3 - paddw m5, m4 - ;Clamp these values to the bit-depth - pminsw m7, m10 - pminsw m8, m10 - pminsw m9, m10 - pminsw m5, m10 - pmaxsw m7, m11 - pmaxsw m8, m11 - pmaxsw m9, m11 - pmaxsw m5, m11 - ;Store these values - mova [dstq ], m7 - mova [dstq +16], m8 - mova [dstq +32], m9 - mova [dstq +48], m5 - paddw m7, m6, m1 - paddw m8, m6, m2 - paddw m9, m6, m3 - paddw m6, m4 - ;Clamp these values to the bit-depth - pminsw m7, m10 - pminsw m8, m10 - pminsw m9, m10 - pminsw m6, m10 - pmaxsw m7, m11 - pmaxsw m8, m11 - pmaxsw m9, m11 - pmaxsw m6, m11 - ;Store these values - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m8 - mova [dstq+strideq*2+32], m9 - mova [dstq+strideq*2+48], m6 + movd m7, [leftq] + pshuflw m7, m7, 0x0 + punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1 + paddw m0, m7, m1 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq ], m0 + paddw m0, m7, m2 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq +16], m0 + paddw m0, m7, m3 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq +32], m0 + paddw m0, m7, m4 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq +48], m0 + movd m7, [leftq+2] + pshuflw m7, m7, 0x0 + punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2 + paddw m0, m7, m1 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq+strideq*2 ], m0 + paddw m0, m7, m2 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq+strideq*2+16], m0 + paddw m0, m7, m3 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq+strideq*2+32], m0 + paddw m0, m7, m4 + pminsw m0, m5 + pmaxsw m0, m6 + mova [dstq+strideq*2+48], m0 lea dstq, [dstq+strideq*4] + lea leftq, [leftq+4] inc lineq jnz .loop REP_RET -%endif diff --git a/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c b/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c index c4fd5e1a0..72e42adc9 100644 --- a/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { // TODO(debargha, peter): Break up large functions into smaller ones // in this file. -static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { +void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); __m128i blimit, limit, thresh; @@ -496,34 +494,19 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, _mm_store_si128((__m128i *)(s - 0 * p), q0); } -static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh, - bd); -} - -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - if (count == 1) - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - else - highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd); +void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { + vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd); } void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, flat_op2[16]); DECLARE_ALIGNED(16, uint16_t, flat_op1[16]); DECLARE_ALIGNED(16, uint16_t, flat_op0[16]); @@ -556,8 +539,6 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, __m128i work_a; __m128i filter1, filter2; - (void)count; - if (bd == 8) { blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); @@ -764,16 +745,15 @@ void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, - 1, bd); + vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd); + vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); } void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, - int count, int bd) { + int bd) { const __m128i zero = _mm_set1_epi16(0); __m128i blimit, limit, thresh; __m128i mask, hev, flat; @@ -813,8 +793,6 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, __m128i work_a; __m128i filter1, filter2; - (void)count; - if (bd == 8) { blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); @@ -944,9 +922,8 @@ void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, - bd); + vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd); + vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); } static INLINE void highbd_transpose(uint16_t *src[], int in_p, @@ -1058,11 +1035,10 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; @@ -1071,8 +1047,7 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); + vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; @@ -1112,11 +1087,10 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; @@ -1125,8 +1099,7 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); + vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; @@ -1181,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 2); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, + thresh, bd); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; @@ -1205,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, + thresh, bd); // Transpose back highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); diff --git a/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm index 93df92a9e..30ee81b68 100644 --- a/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm +++ b/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm @@ -79,20 +79,13 @@ SECTION .text %macro INC_SRC_BY_SRC_STRIDE 0 %if ARCH_X86=1 && CONFIG_PIC=1 - lea srcq, [srcq + src_stridemp*2] + add srcq, src_stridemp + add srcq, src_stridemp %else lea srcq, [srcq + src_strideq*2] %endif %endmacro -%macro INC_SRC_BY_SRC_2STRIDE 0 -%if ARCH_X86=1 && CONFIG_PIC=1 - lea srcq, [srcq + src_stridemp*4] -%else - lea srcq, [srcq + src_strideq*4] -%endif -%endmacro - %macro SUBPEL_VARIANCE 1-2 0 ; W %define bilin_filter_m bilin_filter_m_sse2 %define filter_idx_shift 5 @@ -123,8 +116,10 @@ SECTION .text %define sec_str sec_stridemp ; Store bilin_filter and pw_8 location in stack - GET_GOT eax - add esp, 4 ; restore esp + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -140,8 +135,10 @@ SECTION .text %define block_height heightd ; Store bilin_filter and pw_8 location in stack - GET_GOT eax - add esp, 4 ; restore esp + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -980,8 +977,9 @@ SECTION .text .x_other_y_other_loop: movu m2, [srcq] movu m4, [srcq+2] - movu m3, [srcq+src_strideq*2] - movu m5, [srcq+src_strideq*2+2] + INC_SRC_BY_SRC_STRIDE + movu m3, [srcq] + movu m5, [srcq+2] pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m2, filter_rnd @@ -1014,7 +1012,7 @@ SECTION .text SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 - INC_SRC_BY_SRC_2STRIDE + INC_SRC_BY_SRC_STRIDE lea dstq, [dstq + dst_strideq * 4] %if %2 == 1 ; avg add secq, sec_str diff --git a/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/libvpx/vpx_dsp/x86/highbd_variance_sse2.c index b45331caa..14d029c9a 100644 --- a/libvpx/vpx_dsp/x86/highbd_variance_sse2.c +++ b/libvpx/vpx_dsp/x86/highbd_variance_sse2.c @@ -243,20 +243,24 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, } #if CONFIG_USE_X86INC +// The 2 unused parameters are place holders for PIC enabled build. +// These definitions are for functions defined in +// highbd_subpel_variance_impl_sse2.asm #define DECL(w, opt) \ int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ ptrdiff_t src_stride, \ int x_offset, int y_offset, \ const uint16_t *dst, \ ptrdiff_t dst_stride, \ - int height, unsigned int *sse); -#define DECLS(opt1, opt2) \ - DECL(8, opt1); \ - DECL(16, opt1) - -DECLS(sse2, sse); -// TODO(johannkoenig): enable the ssse3 or delete -// DECLS(ssse3, ssse3); + int height, \ + unsigned int *sse, \ + void *unused0, void *unused); +#define DECLS(opt) \ + DECL(8, opt); \ + DECL(16, opt) + +DECLS(sse2); + #undef DECLS #undef DECL @@ -274,7 +278,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \ x_offset, y_offset, \ dst, dst_stride, h, \ - &sse); \ + &sse, NULL, NULL); \ if (w > wf) { \ unsigned int sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \ @@ -282,19 +286,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \ x_offset, y_offset, \ dst + 16, \ dst_stride, \ - h, &sse2); \ + h, &sse2, \ + NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ x_offset, y_offset, \ dst + 32, dst_stride, \ - h, &sse2); \ + h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, \ - dst + 48, dst_stride, h, &sse2); \ + dst + 48, dst_stride, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ @@ -312,7 +317,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \ x_offset, y_offset, \ dst, dst_stride, \ - h, &sse); \ + h, &sse, NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \ @@ -320,20 +325,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ x_offset, y_offset, \ dst + 16, \ dst_stride, \ - h, &sse2); \ + h, &sse2, \ + NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ x_offset, y_offset, \ dst + 32, dst_stride, \ - h, &sse2); \ + h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ x_offset, y_offset, \ dst + 48, dst_stride, \ - h, &sse2); \ + h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ @@ -359,27 +365,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, \ x_offset, y_offset, dst + (start_row * dst_stride), \ - dst_stride, height, &sse2); \ + dst_stride, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 16 + (start_row * src_stride), src_stride, \ x_offset, y_offset, dst + 16 + (start_row * dst_stride), \ - dst_stride, height, &sse2); \ + dst_stride, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 32 + (start_row * src_stride), src_stride, \ x_offset, y_offset, dst + 32 + (start_row * dst_stride), \ - dst_stride, height, &sse2); \ + dst_stride, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, \ x_offset, y_offset, dst + 48 + (start_row * dst_stride), \ - dst_stride, height, &sse2); \ + dst_stride, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ }\ @@ -391,25 +397,26 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \ return sse - ((cast se * se) >> (wlog2 + hlog2)); \ } -#define FNS(opt1, opt2) \ -FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ -FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ -FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ -FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ -FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ -FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ -FN(16, 16, 16, 4, 4, opt1, (int64_t)); \ -FN(16, 8, 16, 4, 3, opt1, (int64_t)); \ -FN(8, 16, 8, 3, 4, opt1, (int64_t)); \ -FN(8, 8, 8, 3, 3, opt1, (int64_t)); \ -FN(8, 4, 8, 3, 2, opt1, (int64_t)); +#define FNS(opt) \ +FN(64, 64, 16, 6, 6, opt, (int64_t)); \ +FN(64, 32, 16, 6, 5, opt, (int64_t)); \ +FN(32, 64, 16, 5, 6, opt, (int64_t)); \ +FN(32, 32, 16, 5, 5, opt, (int64_t)); \ +FN(32, 16, 16, 5, 4, opt, (int64_t)); \ +FN(16, 32, 16, 4, 5, opt, (int64_t)); \ +FN(16, 16, 16, 4, 4, opt, (int64_t)); \ +FN(16, 8, 16, 4, 3, opt, (int64_t)); \ +FN(8, 16, 8, 3, 4, opt, (int64_t)); \ +FN(8, 8, 8, 3, 3, opt, (int64_t)); \ +FN(8, 4, 8, 3, 2, opt, (int64_t)); -FNS(sse2, sse); +FNS(sse2); #undef FNS #undef FN +// The 2 unused parameters are place holders for PIC enabled build. #define DECL(w, opt) \ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \ ptrdiff_t src_stride, \ @@ -419,7 +426,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \ const uint16_t *sec, \ ptrdiff_t sec_stride, \ int height, \ - unsigned int *sse); + unsigned int *sse, \ + void *unused0, void *unused); #define DECLS(opt1) \ DECL(16, opt1) \ DECL(8, opt1) @@ -439,23 +447,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src, src_stride, x_offset, \ - y_offset, dst, dst_stride, sec, w, h, &sse); \ + y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16, src_stride, x_offset, y_offset, \ - dst + 16, dst_stride, sec + 16, w, h, &sse2); \ + dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 32, src_stride, x_offset, y_offset, \ - dst + 32, dst_stride, sec + 32, w, h, &sse2); \ + dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, \ - dst + 48, dst_stride, sec + 48, w, h, &sse2); \ + dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ @@ -475,14 +483,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src, src_stride, x_offset, \ y_offset, dst, dst_stride, \ - sec, w, h, &sse); \ + sec, w, h, &sse, NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16, src_stride, \ x_offset, y_offset, \ dst + 16, dst_stride, \ - sec + 16, w, h, &sse2); \ + sec + 16, w, h, &sse2, \ + NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ @@ -490,14 +499,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ src + 32, src_stride, \ x_offset, y_offset, \ dst + 32, dst_stride, \ - sec + 32, w, h, &sse2); \ + sec + 32, w, h, &sse2, \ + NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48, src_stride, \ x_offset, y_offset, \ dst + 48, dst_stride, \ - sec + 48, w, h, &sse2); \ + sec + 48, w, h, &sse2, \ + NULL, NULL); \ se += se2; \ sse += sse2; \ } \ @@ -525,7 +536,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, x_offset, \ y_offset, dst + (start_row * dst_stride), dst_stride, \ - sec + (start_row * w), w, height, &sse2); \ + sec + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ @@ -533,7 +544,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ src + 16 + (start_row * src_stride), src_stride, \ x_offset, y_offset, \ dst + 16 + (start_row * dst_stride), dst_stride, \ - sec + 16 + (start_row * w), w, height, &sse2); \ + sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ @@ -541,14 +552,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ src + 32 + (start_row * src_stride), src_stride, \ x_offset, y_offset, \ dst + 32 + (start_row * dst_stride), dst_stride, \ - sec + 32 + (start_row * w), w, height, &sse2); \ + sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, \ x_offset, y_offset, \ dst + 48 + (start_row * dst_stride), dst_stride, \ - sec + 48 + (start_row * w), w, height, &sse2); \ + sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ } \ diff --git a/libvpx/vpx_dsp/x86/intrapred_sse2.asm b/libvpx/vpx_dsp/x86/intrapred_sse2.asm index 22b573188..cd6a6ae98 100644 --- a/libvpx/vpx_dsp/x86/intrapred_sse2.asm +++ b/libvpx/vpx_dsp/x86/intrapred_sse2.asm @@ -11,6 +11,7 @@ %include "third_party/x86inc/x86inc.asm" SECTION_RODATA +pb_1: times 16 db 1 pw_4: times 8 dw 4 pw_8: times 8 dw 8 pw_16: times 8 dw 16 @@ -23,17 +24,127 @@ pw2_32: times 8 dw 16 SECTION .text -INIT_MMX sse -cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset +; ------------------------------------------ +; input: x, y, z, result +; +; trick from pascal +; (x+2y+z+2)>>2 can be calculated as: +; result = avg(x,z) +; result -= xor(x,z) & 1 +; result = avg(result,y) +; ------------------------------------------ +%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 + pavgb %4, %1, %3 + pxor %3, %1 + pand %3, [GLOBAL(pb_1)] + psubb %4, %3 + pavgb %4, %2 +%endmacro + +INIT_XMM sse2 +cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset GET_GOT goffsetq - pxor m1, m1 + movq m0, [aboveq] + DEFINE_ARGS dst, stride, temp + psrldq m1, m0, 1 + psrldq m2, m0, 2 + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 + + ; store 4 lines + movd [dstq ], m3 + psrlq m3, 8 + movd [dstq+strideq ], m3 + lea dstq, [dstq+strideq*2] + psrlq m3, 8 + movd [dstq ], m3 + psrlq m3, 8 + movd [dstq+strideq ], m3 + psrlq m0, 56 + movd tempq, m0 + mov [dstq+strideq+3], tempb + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset + GET_GOT goffsetq + + movu m1, [aboveq] + pslldq m0, m1, 1 + psrldq m2, m1, 1 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 + punpckhbw m0, m0 ; 7 7 + punpcklwd m0, m0 ; 7 7 7 7 + punpckldq m0, m0 ; 7 7 7 7 7 7 7 7 + punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7 + + ; store 4 lines + psrldq m3, 1 + movq [dstq ], m3 + psrldq m3, 1 + movq [dstq+strideq ], m3 + psrldq m3, 1 + movq [dstq+strideq*2], m3 + psrldq m3, 1 + movq [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + + ; store next 4 lines + psrldq m3, 1 + movq [dstq ], m3 + psrldq m3, 1 + movq [dstq+strideq ], m3 + psrldq m3, 1 + movq [dstq+strideq*2], m3 + psrldq m3, 1 + movq [dstq+stride3q ], m3 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset + GET_GOT goffsetq + + movd m0, [leftq] ; abcd [byte] + punpcklbw m4, m0, m0 ; aabb ccdd + punpcklwd m4, m4 ; aaaa bbbb cccc dddd + psrldq m4, 12 ; dddd + punpckldq m0, m4 ; abcd dddd + psrldq m1, m0, 1 ; bcdd + psrldq m2, m0, 2 ; cddd + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d + pavgb m1, m0 ; ab, bc, cd, d [byte] + + punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d + movd [dstq ], m1 + psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d + movd [dstq+strideq], m1 + + lea dstq, [dstq+strideq*2] + psrlq m1, 16 ; cd, c3d, d, d + movd [dstq ], m1 + movd [dstq+strideq], m4 ; d, d, d, d + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + movd m2, [leftq] movd m0, [aboveq] - punpckldq m0, [leftq] + pxor m1, m1 + punpckldq m0, m2 psadbw m0, m1 paddw m0, [GLOBAL(pw_4)] psraw m0, 3 - pshufw m0, m0, 0x0 + pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 @@ -44,8 +155,9 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset + movifnidn leftq, leftmp GET_GOT goffsetq pxor m1, m1 @@ -53,7 +165,7 @@ cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset psadbw m0, m1 paddw m0, [GLOBAL(pw2_4)] psraw m0, 2 - pshufw m0, m0, 0x0 + pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 @@ -64,8 +176,8 @@ cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 @@ -73,7 +185,7 @@ cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset psadbw m0, m1 paddw m0, [GLOBAL(pw2_4)] psraw m0, 2 - pshufw m0, m0, 0x0 + pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 @@ -84,7 +196,7 @@ cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse +INIT_XMM sse2 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq @@ -98,8 +210,8 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset paddw m0, m2 paddw m0, [GLOBAL(pw_8)] psraw m0, 4 - pshufw m0, m0, 0x0 - packuswb m0, m0 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 @@ -113,8 +225,8 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 @@ -124,8 +236,8 @@ cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset psadbw m0, m1 paddw m0, [GLOBAL(pw2_8)] psraw m0, 3 - pshufw m0, m0, 0x0 - packuswb m0, m0 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 @@ -139,8 +251,9 @@ cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset + movifnidn leftq, leftmp GET_GOT goffsetq pxor m1, m1 @@ -150,8 +263,8 @@ cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset psadbw m0, m1 paddw m0, [GLOBAL(pw2_8)] psraw m0, 3 - pshufw m0, m0, 0x0 - packuswb m0, m0 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 @@ -165,8 +278,8 @@ cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3 @@ -179,8 +292,8 @@ cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse -cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset +INIT_XMM sse2 +cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3 @@ -236,14 +349,11 @@ cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 - pxor m2, m2 mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_16)] @@ -268,14 +378,11 @@ cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 - pxor m2, m2 mova m0, [leftq] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_16)] @@ -452,7 +559,7 @@ cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX sse +INIT_XMM sse2 cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above movd m0, [aboveq] movd [dstq ], m0 @@ -462,7 +569,7 @@ cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above movd [dstq+strideq], m0 RET -INIT_MMX sse +INIT_XMM sse2 cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above movq m0, [aboveq] DEFINE_ARGS dst, stride, stride3 @@ -515,108 +622,196 @@ cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above jnz .loop REP_RET -INIT_MMX sse -cglobal tm_predictor_4x4, 4, 4, 4, dst, stride, above, left - pxor m1, m1 - movd m2, [aboveq-1] - movd m0, [aboveq] - punpcklbw m2, m1 - punpcklbw m0, m1 - pshufw m2, m2, 0x0 - DEFINE_ARGS dst, stride, line, left +INIT_XMM sse2 +cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left + movifnidn leftq, leftmp + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 + pshufd m1, m0, 0x1 + movd [dstq ], m0 + movd [dstq+strideq], m1 + pshufd m2, m0, 0x2 + lea dstq, [dstq+strideq*2] + pshufd m3, m0, 0x3 + movd [dstq ], m2 + movd [dstq+strideq], m3 + RET + +INIT_XMM sse2 +cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp mov lineq, -2 - add leftq, 4 - psubw m0, m2 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] + movq m0, [leftq ] + punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8 .loop: - movd m2, [leftq+lineq*2] - movd m3, [leftq+lineq*2+1] + pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1 + pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2 + movq [dstq ], m1 + movq [dstq+strideq], m2 + pshuflw m1, m0, 0xaa + pshuflw m2, m0, 0xff + movq [dstq+strideq*2], m1 + movq [dstq+stride3q ], m2 + pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 + inc lineq + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp + mov lineq, -4 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] +.loop: + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 ; l1 to l4 each repeated 4 times + pshufd m1, m0, 0x0 ; l1 repeated 16 times + pshufd m2, m0, 0x55 ; l2 repeated 16 times + mova [dstq ], m1 + mova [dstq+strideq ], m2 + pshufd m1, m0, 0xaa + pshufd m2, m0, 0xff + mova [dstq+strideq*2], m1 + mova [dstq+stride3q ], m2 + inc lineq + lea leftq, [leftq+4 ] + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp + mov lineq, -8 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] +.loop: + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 ; l1 to l4 each repeated 4 times + pshufd m1, m0, 0x0 ; l1 repeated 16 times + pshufd m2, m0, 0x55 ; l2 repeated 16 times + mova [dstq ], m1 + mova [dstq+16 ], m1 + mova [dstq+strideq ], m2 + mova [dstq+strideq+16 ], m2 + pshufd m1, m0, 0xaa + pshufd m2, m0, 0xff + mova [dstq+strideq*2 ], m1 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q ], m2 + mova [dstq+stride3q+16 ], m2 + inc lineq + lea leftq, [leftq+4 ] + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left + pxor m1, m1 + movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x + punpcklbw m0, m1 + pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word] + psrldq m0, 2 + psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word] + movd m2, [leftq] punpcklbw m2, m1 - punpcklbw m3, m1 - pshufw m2, m2, 0x0 - pshufw m3, m3, 0x0 - paddw m2, m0 + pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] + pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] + paddw m4, m0 paddw m3, m0 - packuswb m2, m2 + packuswb m4, m4 packuswb m3, m3 - movd [dstq ], m2 + movd [dstq ], m4 movd [dstq+strideq], m3 lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET + pshuflw m4, m2, 0xaa + pshuflw m3, m2, 0xff + paddw m4, m0 + paddw m3, m0 + packuswb m4, m4 + packuswb m3, m3 + movd [dstq ], m4 + movd [dstq+strideq], m3 + RET INIT_XMM sse2 -cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left +cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] movq m0, [aboveq] punpcklbw m2, m1 - punpcklbw m0, m1 - pshuflw m2, m2, 0x0 + punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word] + pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word] DEFINE_ARGS dst, stride, line, left mov lineq, -4 - punpcklqdq m2, m2 - add leftq, 8 - psubw m0, m2 -.loop: - movd m2, [leftq+lineq*2] - movd m3, [leftq+lineq*2+1] - punpcklbw m2, m1 - punpcklbw m3, m1 - pshuflw m2, m2, 0x0 - pshuflw m3, m3, 0x0 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - paddw m2, m0 + punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word] + psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word] + movq m2, [leftq] + punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word] +.loop + pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] + pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] + punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word] + punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word] + paddw m4, m0 paddw m3, m0 - packuswb m2, m3 - movq [dstq ], m2 - movhps [dstq+strideq], m2 + packuswb m4, m3 + movq [dstq ], m4 + movhps [dstq+strideq], m4 lea dstq, [dstq+strideq*2] + psrldq m2, 4 inc lineq jnz .loop REP_RET INIT_XMM sse2 -cglobal tm_predictor_16x16, 4, 4, 7, dst, stride, above, left +cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left pxor m1, m1 - movd m2, [aboveq-1] - mova m0, [aboveq] - punpcklbw m2, m1 + mova m2, [aboveq-16]; + mova m0, [aboveq] ; t1 t2 ... t16 [byte] + punpckhbw m2, m1 ; [127:112] tl [word] punpckhbw m4, m0, m1 - punpcklbw m0, m1 - pshuflw m2, m2, 0x0 - DEFINE_ARGS dst, stride, line, left + punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word] + DEFINE_ARGS dst, stride, line, left, stride8 mov lineq, -8 - punpcklqdq m2, m2 - add leftq, 16 + pshufhw m2, m2, 0xff + mova m3, [leftq] ; l1 l2 ... l16 [byte] + punpckhqdq m2, m2 ; tl repeated 8 times [word] psubw m0, m2 - psubw m4, m2 + psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word] + punpckhbw m5, m3, m1 + punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word] + lea stride8q, [strideq*8] .loop: - movd m2, [leftq+lineq*2] - movd m3, [leftq+lineq*2+1] - punpcklbw m2, m1 - punpcklbw m3, m1 - pshuflw m2, m2, 0x0 - pshuflw m3, m3, 0x0 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - paddw m5, m2, m0 - paddw m6, m3, m0 - paddw m2, m4 - paddw m3, m4 - packuswb m5, m2 - packuswb m6, m3 - mova [dstq ], m5 - mova [dstq+strideq], m6 - lea dstq, [dstq+strideq*2] + pshuflw m6, m3, 0x0 + pshuflw m7, m5, 0x0 + punpcklqdq m6, m6 ; l1 repeated 8 times [word] + punpcklqdq m7, m7 ; l8 repeated 8 times [word] + paddw m1, m6, m0 + paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word] + psrldq m5, 2 + packuswb m1, m6 + mova [dstq ], m1 + paddw m1, m7, m0 + paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word] + psrldq m3, 2 + packuswb m1, m7 + mova [dstq+stride8q], m1 inc lineq + lea dstq, [dstq+strideq] jnz .loop REP_RET -%if ARCH_X86_64 INIT_XMM sse2 -cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left +cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] mova m0, [aboveq] @@ -637,31 +832,29 @@ cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left psubw m5, m2 .loop: movd m2, [leftq+lineq*2] - movd m6, [leftq+lineq*2+1] + pxor m1, m1 punpcklbw m2, m1 - punpcklbw m6, m1 + pshuflw m7, m2, 0x55 pshuflw m2, m2, 0x0 - pshuflw m6, m6, 0x0 punpcklqdq m2, m2 - punpcklqdq m6, m6 - paddw m7, m2, m0 - paddw m8, m2, m3 - paddw m9, m2, m4 - paddw m2, m5 - packuswb m7, m8 - packuswb m9, m2 - paddw m2, m6, m0 - paddw m8, m6, m3 - mova [dstq ], m7 - paddw m7, m6, m4 - paddw m6, m5 - mova [dstq +16], m9 - packuswb m2, m8 - packuswb m7, m6 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16], m7 + punpcklqdq m7, m7 + paddw m6, m2, m3 + paddw m1, m2, m0 + packuswb m1, m6 + mova [dstq ], m1 + paddw m6, m2, m5 + paddw m1, m2, m4 + packuswb m1, m6 + mova [dstq+16 ], m1 + paddw m6, m7, m3 + paddw m1, m7, m0 + packuswb m1, m6 + mova [dstq+strideq ], m1 + paddw m6, m7, m5 + paddw m1, m7, m4 + packuswb m1, m6 + mova [dstq+strideq+16], m1 lea dstq, [dstq+strideq*2] inc lineq jnz .loop REP_RET -%endif diff --git a/libvpx/vpx_dsp/x86/intrapred_ssse3.asm b/libvpx/vpx_dsp/x86/intrapred_ssse3.asm index 88df9b2d1..5e0139fa8 100644 --- a/libvpx/vpx_dsp/x86/intrapred_ssse3.asm +++ b/libvpx/vpx_dsp/x86/intrapred_ssse3.asm @@ -13,7 +13,6 @@ SECTION_RODATA pb_1: times 16 db 1 -sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 @@ -28,151 +27,9 @@ sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0 sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0 sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -sh_b1233: db 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b2333: db 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 SECTION .text -INIT_MMX ssse3 -cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left - movifnidn leftq, leftmp - add leftq, 4 - mov lineq, -2 - pxor m0, m0 -.loop: - movd m1, [leftq+lineq*2 ] - movd m2, [leftq+lineq*2+1] - pshufb m1, m0 - pshufb m2, m0 - movd [dstq ], m1 - movd [dstq+strideq], m2 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET - -INIT_MMX ssse3 -cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left - movifnidn leftq, leftmp - add leftq, 8 - mov lineq, -4 - pxor m0, m0 -.loop: - movd m1, [leftq+lineq*2 ] - movd m2, [leftq+lineq*2+1] - pshufb m1, m0 - pshufb m2, m0 - movq [dstq ], m1 - movq [dstq+strideq], m2 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET - -INIT_XMM ssse3 -cglobal h_predictor_16x16, 2, 4, 3, dst, stride, line, left - movifnidn leftq, leftmp - add leftq, 16 - mov lineq, -8 - pxor m0, m0 -.loop: - movd m1, [leftq+lineq*2 ] - movd m2, [leftq+lineq*2+1] - pshufb m1, m0 - pshufb m2, m0 - mova [dstq ], m1 - mova [dstq+strideq], m2 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET - -INIT_XMM ssse3 -cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left - movifnidn leftq, leftmp - add leftq, 32 - mov lineq, -16 - pxor m0, m0 -.loop: - movd m1, [leftq+lineq*2 ] - movd m2, [leftq+lineq*2+1] - pshufb m1, m0 - pshufb m2, m0 - mova [dstq ], m1 - mova [dstq +16], m1 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16], m2 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET - -INIT_MMX ssse3 -cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movq m0, [aboveq] - pshufb m2, m0, [GLOBAL(sh_b23456777)] - pshufb m1, m0, [GLOBAL(sh_b01234577)] - pshufb m0, [GLOBAL(sh_b12345677)] - pavgb m3, m2, m1 - pxor m2, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m0, m3 - - ; store 4 lines - movd [dstq ], m0 - psrlq m0, 8 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - psrlq m0, 8 - movd [dstq ], m0 - psrlq m0, 8 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_MMX ssse3 -cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movq m0, [aboveq] - mova m1, [GLOBAL(sh_b12345677)] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - pshufb m2, m0, [GLOBAL(sh_b23456777)] - pavgb m3, m2, m0 - pxor m2, m0 - pshufb m0, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m0, m3 - - ; store 4 lines - movq [dstq ], m0 - pshufb m0, m1 - movq [dstq+strideq ], m0 - pshufb m0, m1 - movq [dstq+strideq*2], m0 - pshufb m0, m1 - movq [dstq+stride3q ], m0 - pshufb m0, m1 - lea dstq, [dstq+strideq*4] - - ; store next 4 lines - movq [dstq ], m0 - pshufb m0, m1 - movq [dstq+strideq ], m0 - pshufb m0, m1 - movq [dstq+strideq*2], m0 - pshufb m0, m1 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - INIT_XMM ssse3 cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset GET_GOT goffsetq @@ -789,28 +646,6 @@ cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset RESTORE_GOT RET -INIT_MMX ssse3 -cglobal d207_predictor_4x4, 4, 5, 4, dst, stride, unused, left, goffset - GET_GOT goffsetq - movd m0, [leftq] ; abcd [byte] - pshufb m1, m0, [GLOBAL(sh_b1233)] ; bcdd [byte] - pshufb m3, m0, [GLOBAL(sh_b2333)] ; cddd - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m3, m2 - pavgb m1, m0 ; ab, bc, cd, d [byte] - - punpcklbw m1, m2 ; ab, a2bc, bc, b2cd, cd, c3d, d, d - movd [dstq ], m1 - psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d - movd [dstq+strideq], m1 - lea dstq, [dstq+strideq*2] - psrlq m1, 16 ; cd, c3d, d, d - movd [dstq ], m1 - pshufw m1, m1, q1111 ; d, d, d, d - movd [dstq+strideq], m1 - RESTORE_GOT - RET - INIT_XMM ssse3 cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset GET_GOT goffsetq diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c index ae907fd0b..df5068c62 100644 --- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c +++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c @@ -158,8 +158,8 @@ void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, const __m128i zero = _mm_setzero_si128(); int a; - a = dct_const_round_shift(input[0] * cospi_16_64); - a = dct_const_round_shift(a * cospi_16_64); + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); a = ROUND_POWER_OF_TWO(a, 4); dc_value = _mm_set1_epi16(a); @@ -527,8 +527,8 @@ void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, const __m128i zero = _mm_setzero_si128(); int a; - a = dct_const_round_shift(input[0] * cospi_16_64); - a = dct_const_round_shift(a * cospi_16_64); + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); a = ROUND_POWER_OF_TWO(a, 5); dc_value = _mm_set1_epi16(a); @@ -1305,30 +1305,16 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, const __m128i zero = _mm_setzero_si128(); int a, i; - a = dct_const_round_shift(input[0] * cospi_16_64); - a = dct_const_round_shift(a * cospi_16_64); + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); a = ROUND_POWER_OF_TWO(a, 6); dc_value = _mm_set1_epi16(a); - for (i = 0; i < 2; ++i) { - RECON_AND_STORE(dest + 0 * stride, dc_value); - RECON_AND_STORE(dest + 1 * stride, dc_value); - RECON_AND_STORE(dest + 2 * stride, dc_value); - RECON_AND_STORE(dest + 3 * stride, dc_value); - RECON_AND_STORE(dest + 4 * stride, dc_value); - RECON_AND_STORE(dest + 5 * stride, dc_value); - RECON_AND_STORE(dest + 6 * stride, dc_value); - RECON_AND_STORE(dest + 7 * stride, dc_value); - RECON_AND_STORE(dest + 8 * stride, dc_value); - RECON_AND_STORE(dest + 9 * stride, dc_value); - RECON_AND_STORE(dest + 10 * stride, dc_value); - RECON_AND_STORE(dest + 11 * stride, dc_value); - RECON_AND_STORE(dest + 12 * stride, dc_value); - RECON_AND_STORE(dest + 13 * stride, dc_value); - RECON_AND_STORE(dest + 14 * stride, dc_value); - RECON_AND_STORE(dest + 15 * stride, dc_value); - dest += 8; + for (i = 0; i < 16; ++i) { + RECON_AND_STORE(dest + 0, dc_value); + RECON_AND_STORE(dest + 8, dc_value); + dest += stride; } } @@ -3476,8 +3462,8 @@ void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, const __m128i zero = _mm_setzero_si128(); int a, j; - a = dct_const_round_shift(input[0] * cospi_16_64); - a = dct_const_round_shift(a * cospi_16_64); + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); a = ROUND_POWER_OF_TWO(a, 6); dc_value = _mm_set1_epi16(a); diff --git a/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm index 68e7fa40c..20baf820f 100644 --- a/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm +++ b/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm @@ -17,18 +17,70 @@ SECTION_RODATA pw_11585x2: times 8 dw 23170 + +pw_m2404x2: times 8 dw -2404*2 +pw_m4756x2: times 8 dw -4756*2 +pw_m5520x2: times 8 dw -5520*2 +pw_m8423x2: times 8 dw -8423*2 +pw_m9102x2: times 8 dw -9102*2 +pw_m10394x2: times 8 dw -10394*2 +pw_m11003x2: times 8 dw -11003*2 + +pw_16364x2: times 8 dw 16364*2 +pw_16305x2: times 8 dw 16305*2 +pw_16207x2: times 8 dw 16207*2 +pw_16069x2: times 8 dw 16069*2 +pw_15893x2: times 8 dw 15893*2 +pw_15679x2: times 8 dw 15679*2 +pw_15426x2: times 8 dw 15426*2 +pw_15137x2: times 8 dw 15137*2 +pw_14811x2: times 8 dw 14811*2 +pw_14449x2: times 8 dw 14449*2 +pw_14053x2: times 8 dw 14053*2 +pw_13623x2: times 8 dw 13623*2 +pw_13160x2: times 8 dw 13160*2 +pw_12665x2: times 8 dw 12665*2 +pw_12140x2: times 8 dw 12140*2 +pw__9760x2: times 8 dw 9760*2 +pw__7723x2: times 8 dw 7723*2 +pw__7005x2: times 8 dw 7005*2 +pw__6270x2: times 8 dw 6270*2 +pw__3981x2: times 8 dw 3981*2 +pw__3196x2: times 8 dw 3196*2 +pw__1606x2: times 8 dw 1606*2 +pw___804x2: times 8 dw 804*2 + pd_8192: times 4 dd 8192 +pw_32: times 8 dw 32 pw_16: times 8 dw 16 %macro TRANSFORM_COEFFS 2 pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1 +pw_m%1_m%2: dw -%1, -%2, -%1, -%2, -%1, -%2, -%1, -%2 %endmacro TRANSFORM_COEFFS 6270, 15137 TRANSFORM_COEFFS 3196, 16069 TRANSFORM_COEFFS 13623, 9102 +; constants for 32x32_34 +TRANSFORM_COEFFS 804, 16364 +TRANSFORM_COEFFS 15426, 5520 +TRANSFORM_COEFFS 3981, 15893 +TRANSFORM_COEFFS 16207, 2404 +TRANSFORM_COEFFS 1606, 16305 +TRANSFORM_COEFFS 15679, 4756 +TRANSFORM_COEFFS 11585, 11585 + +; constants for 32x32_1024 +TRANSFORM_COEFFS 12140, 11003 +TRANSFORM_COEFFS 7005, 14811 +TRANSFORM_COEFFS 14053, 8423 +TRANSFORM_COEFFS 9760, 13160 +TRANSFORM_COEFFS 12665, 10394 +TRANSFORM_COEFFS 7723, 14449 + %macro PAIR_PP_COEFFS 2 dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 %endmacro @@ -80,6 +132,15 @@ SECTION .text packssdw m%2, m%6 %endmacro +%macro BUTTERFLY_4Xmm 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 + punpckhwd m%6, m%2, m%1 + MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_m%3_m%4] + punpcklwd m%2, m%1 + MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_m%3_m%4] + packssdw m%1, m%7 + packssdw m%2, m%6 +%endmacro + ; matrix transpose %macro INTERLEAVE_2X 4 punpckh%1 m%4, m%2, m%3 @@ -159,7 +220,24 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride mova m12, [pw_11585x2] lea r3, [2 * strideq] - +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] + mova m2, [inputq + 64] + packssdw m2, [inputq + 80] + mova m3, [inputq + 96] + packssdw m3, [inputq + 112] + mova m4, [inputq + 128] + packssdw m4, [inputq + 144] + mova m5, [inputq + 160] + packssdw m5, [inputq + 176] + mova m6, [inputq + 192] + packssdw m6, [inputq + 208] + mova m7, [inputq + 224] + packssdw m7, [inputq + 240] +%else mova m0, [inputq + 0] mova m1, [inputq + 16] mova m2, [inputq + 32] @@ -168,7 +246,7 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride mova m5, [inputq + 80] mova m6, [inputq + 96] mova m7, [inputq + 112] - +%endif TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 IDCT8_1D TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 @@ -193,10 +271,21 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride lea r3, [2 * strideq] +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] + mova m2, [inputq + 64] + packssdw m2, [inputq + 80] + mova m3, [inputq + 96] + packssdw m3, [inputq + 112] +%else mova m0, [inputq + 0] mova m1, [inputq + 16] mova m2, [inputq + 32] mova m3, [inputq + 48] +%endif punpcklwd m0, m1 punpcklwd m2, m3 @@ -298,4 +387,1407 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride RET +%define idx0 16 * 0 +%define idx1 16 * 1 +%define idx2 16 * 2 +%define idx3 16 * 3 +%define idx4 16 * 4 +%define idx5 16 * 5 +%define idx6 16 * 6 +%define idx7 16 * 7 +%define idx8 16 * 0 +%define idx9 16 * 1 +%define idx10 16 * 2 +%define idx11 16 * 3 +%define idx12 16 * 4 +%define idx13 16 * 5 +%define idx14 16 * 6 +%define idx15 16 * 7 +%define idx16 16 * 0 +%define idx17 16 * 1 +%define idx18 16 * 2 +%define idx19 16 * 3 +%define idx20 16 * 4 +%define idx21 16 * 5 +%define idx22 16 * 6 +%define idx23 16 * 7 +%define idx24 16 * 0 +%define idx25 16 * 1 +%define idx26 16 * 2 +%define idx27 16 * 3 +%define idx28 16 * 4 +%define idx29 16 * 5 +%define idx30 16 * 6 +%define idx31 16 * 7 + +; FROM idct32x32_add_neon.asm +; +; Instead of doing the transforms stage by stage, it is done by loading +; some input values and doing as many stages as possible to minimize the +; storing/loading of intermediate results. To fit within registers, the +; final coefficients are cut into four blocks: +; BLOCK A: 16-19,28-31 +; BLOCK B: 20-23,24-27 +; BLOCK C: 8-11,12-15 +; BLOCK D: 0-3,4-7 +; Blocks A and C are straight calculation through the various stages. In +; block B, further calculations are performed using the results from +; block A. In block D, further calculations are performed using the results +; from block C and then the final calculations are done using results from +; block A and B which have been combined at the end of block B. +; + +%macro IDCT32X32_34 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, m1 + pmulhrsw m1, [pw___804x2] ; stp1_16 + mova [r4 + 0], m0 + pmulhrsw m11, [pw_16364x2] ; stp2_31 + mova [r4 + 16 * 2], m2 + mova m12, m7 + pmulhrsw m7, [pw_15426x2] ; stp1_28 + mova [r4 + 16 * 4], m4 + pmulhrsw m12, [pw_m5520x2] ; stp2_19 + mova [r4 + 16 * 6], m6 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m2, m1 ; stp1_16 + mova m0, m11 ; stp1_31 + mova m4, m7 ; stp1_28 + mova m15, m12 ; stp1_19 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 15, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 15, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 15, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m6, m5 + pmulhrsw m5, [pw__3981x2] ; stp1_20 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m15 + pmulhrsw m6, [pw_15893x2] ; stp2_27 + mova [stp + %4 + idx30], m2 + mova m2, m3 + pmulhrsw m3, [pw_m2404x2] ; stp1_23 + mova [stp + %4 + idx31], m11 + pmulhrsw m2, [pw_16207x2] ; stp2_24 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m13, m5 ; stp1_20 + mova m14, m6 ; stp1_27 + mova m15, m3 ; stp1_23 + mova m11, m2 ; stp1_24 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 11, 15, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 15, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 11, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 0, 15, 9 ; stp2_17, stp2_22 + SUM_SUB 4, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 7, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m10, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 10, 11, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m10 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 11, 15, 9 + pmulhrsw m11, m10 ; stp1_25 + pmulhrsw m15, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_24 + pmulhrsw m3, m10 ; stp1_23 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 11, 15, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 11, 15 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m11 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m6, [rsp + transposed_in + 16 * 6] + + mova m1, m0 + pmulhrsw m0, [pw__1606x2] ; stp1_8 + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + pmulhrsw m1, [pw_16305x2] ; stp2_15 + mova [stp + %3 + idx22], m15 + mova m7, m6 + pmulhrsw m7, [pw_m4756x2] ; stp2_11 + mova [stp + %3 + idx23], m3 + pmulhrsw m6, [pw_15679x2] ; stp1_12 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m3, m0 ; stp1_8 + mova m2, m1 ; stp1_15 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + mova m4, m7 ; stp1_11 + mova m5, m6 ; stp1_12 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, m11 + pmulhrsw m11, [pw__3196x2] ; stp1_4 + pmulhrsw m12, [pw_16069x2] ; stp1_7 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m10, [pw_11585x2] + pmulhrsw m0, m10 ; stp1_1 + + mova m14, m11 ; stp1_4 + mova m13, m12 ; stp1_7 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + mova m7, m0 ; stp1_0 = stp1_1 + mova m4, m0 ; stp1_1 + mova m2, m7 ; stp1_0 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 7, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 4, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 1, 9 ; stp1_0, stp1_15 + SUM_SUB 7, 3, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 4, 6, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m15, [stp + %4 + idx30] + mova m10, [stp + %4 + idx31] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 7, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m7 + mova [stp + %4 + idx30], m15 + mova [stp + %4 + idx31], m10 + mova m7, [stp + %4 + idx28] + mova m0, [stp + %4 + idx29] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 4, 7, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m4 + mova [stp + %4 + idx28], m7 + mova [stp + %4 + idx29], m0 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m4, [stp + %3 + idx19] + SUM_SUB 1, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 3, 7, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 6, 4, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m4 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m0, [stp + %4 + idx27] + mova m1, [stp + %4 + idx26] + mova m2, [stp + %4 + idx25] + mova m3, [stp + %4 + idx24] + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + mova [stp + %4 + idx27], m0 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx24], m3 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +%macro RECON_AND_STORE 1 + mova m11, [pw_32] + lea stp, [rsp + %1] + mov r6, 32 + pxor m8, m8 +%%recon_and_store: + mova m0, [stp + 16 * 32 * 0] + mova m1, [stp + 16 * 32 * 1] + mova m2, [stp + 16 * 32 * 2] + mova m3, [stp + 16 * 32 * 3] + add stp, 16 + + paddw m0, m11 + paddw m1, m11 + paddw m2, m11 + paddw m3, m11 + psraw m0, 6 + psraw m1, 6 + psraw m2, 6 + psraw m3, 6 + movh m4, [outputq + 0] + movh m5, [outputq + 8] + movh m6, [outputq + 16] + movh m7, [outputq + 24] + punpcklbw m4, m8 + punpcklbw m5, m8 + punpcklbw m6, m8 + punpcklbw m7, m8 + paddw m0, m4 + paddw m1, m5 + paddw m2, m6 + paddw m3, m7 + packuswb m0, m1 + packuswb m2, m3 + mova [outputq + 0], m0 + mova [outputq + 16], m2 + lea outputq, [outputq + strideq] + dec r6 + jnz %%recon_and_store +%endmacro + +%define i32x32_size 16*32*5 +%define pass_two_start 16*32*0 +%define transposed_in 16*32*4 +%define pass_one_start 16*32*0 +%define stp r8 + +INIT_XMM ssse3 +cglobal idct32x32_34_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + lea stp, [rsp + pass_one_start] + +idct32x32_34: + mov r3, inputq + lea r4, [rsp + transposed_in] + +idct32x32_34_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + IDCT32X32_34 16*0, 16*32, 16*64, 16*96 + lea stp, [stp + 16 * 8] + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_34_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + +idct32x32_34_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + IDCT32X32_34 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_34_2 + + RECON_AND_STORE pass_two_start + + RET + +%macro IDCT32X32_135 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m1, [rsp + transposed_in + 16 * 1] + mova m11, m1 + pmulhrsw m1, [pw___804x2] ; stp1_16 + pmulhrsw m11, [pw_16364x2] ; stp2_31 + + mova m7, [rsp + transposed_in + 16 * 7] + mova m12, m7 + pmulhrsw m7, [pw_15426x2] ; stp1_28 + pmulhrsw m12, [pw_m5520x2] ; stp2_19 + + mova m3, [rsp + transposed_in + 16 * 9] + mova m4, m3 + pmulhrsw m3, [pw__7005x2] ; stp1_18 + pmulhrsw m4, [pw_14811x2] ; stp2_29 + + mova m0, [rsp + transposed_in + 16 * 15] + mova m2, m0 + pmulhrsw m0, [pw_12140x2] ; stp1_30 + pmulhrsw m2, [pw_m11003x2] ; stp2_17 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 + SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 + SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 + SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m3 + mova [stp + %4 + idx30], m2 + mova [stp + %4 + idx31], m11 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m2, [rsp + transposed_in + 16 * 3] + mova m3, m2 + pmulhrsw m3, [pw_m2404x2] ; stp1_23 + pmulhrsw m2, [pw_16207x2] ; stp2_24 + + mova m5, [rsp + transposed_in + 16 * 5] + mova m6, m5 + pmulhrsw m5, [pw__3981x2] ; stp1_20 + pmulhrsw m6, [pw_15893x2] ; stp2_27 + + mova m14, [rsp + transposed_in + 16 * 11] + mova m13, m14 + pmulhrsw m13, [pw_m8423x2] ; stp1_21 + pmulhrsw m14, [pw_14053x2] ; stp2_26 + + mova m0, [rsp + transposed_in + 16 * 13] + mova m1, m0 + pmulhrsw m0, [pw__9760x2] ; stp1_22 + pmulhrsw m1, [pw_13160x2] ; stp2_25 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 + SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 + SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 + SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m11, [stp + %3 + idx18] + mova m12, [stp + %3 + idx19] + SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 + SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m4 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m11 + mova [stp + %3 + idx19], m12 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m11, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m11 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 1, 0, 9 + pmulhrsw m1, m10 ; stp1_25 + pmulhrsw m0, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_25 + pmulhrsw m3, m10 ; stp1_22 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 1, 0 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + mova [stp + %3 + idx22], m0 + mova [stp + %3 + idx23], m3 + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m1 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m1, m0 + pmulhrsw m0, [pw__1606x2] ; stp1_8 + pmulhrsw m1, [pw_16305x2] ; stp2_15 + + mova m6, [rsp + transposed_in + 16 * 6] + mova m7, m6 + pmulhrsw m7, [pw_m4756x2] ; stp2_11 + pmulhrsw m6, [pw_15679x2] ; stp1_12 + + mova m4, [rsp + transposed_in + 16 * 10] + mova m5, m4 + pmulhrsw m4, [pw__7723x2] ; stp1_10 + pmulhrsw m5, [pw_14449x2] ; stp2_13 + + mova m2, [rsp + transposed_in + 16 * 14] + mova m3, m2 + pmulhrsw m3, [pw_m10394x2] ; stp1_9 + pmulhrsw m2, [pw_12665x2] ; stp2_14 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 + SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 + SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 + SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, m11 + pmulhrsw m11, [pw__3196x2] ; stp1_4 + pmulhrsw m12, [pw_16069x2] ; stp1_7 + + mova m13, [rsp + transposed_in + 16 * 12] + mova m14, m13 + pmulhrsw m13, [pw_13623x2] ; stp1_6 + pmulhrsw m14, [pw_m9102x2] ; stp1_5 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m2, [rsp + transposed_in + 16 * 8] + pmulhrsw m0, [pw_11585x2] ; stp1_1 + mova m3, m2 + pmulhrsw m2, [pw__6270x2] ; stp1_2 + pmulhrsw m3, [pw_15137x2] ; stp1_3 + + SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 + SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + mova m1, m0 ; stp1_0 = stp1_1 + SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 + SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %2 + idx12] + mova m5, [stp + %2 + idx13] + mova m6, [stp + %2 + idx14] + mova m7, [stp + %2 + idx15] + SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 + SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m10, [stp + %4 + idx31] + mova m15, [stp + %4 + idx30] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m1 + mova [stp + %4 + idx31], m10 + mova [stp + %4 + idx30], m15 + mova m0, [stp + %4 + idx29] + mova m1, [stp + %4 + idx28] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m3 + mova [stp + %4 + idx29], m0 + mova [stp + %4 + idx28], m1 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m1, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m3, [stp + %3 + idx19] + SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m4 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m6 + mova [stp + %2 + idx15], m7 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m1 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m3 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m3, [stp + %4 + idx24] + mova m2, [stp + %4 + idx25] + mova m1, [stp + %4 + idx26] + mova m0, [stp + %4 + idx27] + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + mova [stp + %4 + idx24], m3 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx27], m0 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +INIT_XMM ssse3 +cglobal idct32x32_135_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + mov r6, 2 + lea stp, [rsp + pass_one_start] + +idct32x32_135: + mov r3, inputq + lea r4, [rsp + transposed_in] + mov r7, 2 + +idct32x32_135_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + +%if CONFIG_VP9_HIGHBITDEPTH + add r3, 32 +%else + add r3, 16 +%endif + add r4, 16 * 8 + dec r7 + jne idct32x32_135_transpose + + IDCT32X32_135 16*0, 16*32, 16*64, 16*96 + lea stp, [stp + 16 * 8] +%if CONFIG_VP9_HIGHBITDEPTH + lea inputq, [inputq + 32 * 32] +%else + lea inputq, [inputq + 16 * 32] +%endif + dec r6 + jnz idct32x32_135 + + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_135_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + mov r7, 2 + +idct32x32_135_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + + add r3, 16 * 8 + add r4, 16 * 8 + dec r7 + jne idct32x32_135_transpose_2 + + IDCT32X32_135 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_135_2 + + RECON_AND_STORE pass_two_start + + RET + +%macro IDCT32X32_1024 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m1, [rsp + transposed_in + 16 * 1] + mova m11, [rsp + transposed_in + 16 * 31] + BUTTERFLY_4X 1, 11, 804, 16364, m8, 9, 10 ; stp1_16, stp1_31 + + mova m0, [rsp + transposed_in + 16 * 15] + mova m2, [rsp + transposed_in + 16 * 17] + BUTTERFLY_4X 2, 0, 12140, 11003, m8, 9, 10 ; stp1_17, stp1_30 + + mova m7, [rsp + transposed_in + 16 * 7] + mova m12, [rsp + transposed_in + 16 * 25] + BUTTERFLY_4X 12, 7, 15426, 5520, m8, 9, 10 ; stp1_19, stp1_28 + + mova m3, [rsp + transposed_in + 16 * 9] + mova m4, [rsp + transposed_in + 16 * 23] + BUTTERFLY_4X 3, 4, 7005, 14811, m8, 9, 10 ; stp1_18, stp1_29 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 + SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 + SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 + SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m3 + mova [stp + %4 + idx30], m2 + mova [stp + %4 + idx31], m11 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m5, [rsp + transposed_in + 16 * 5] + mova m6, [rsp + transposed_in + 16 * 27] + BUTTERFLY_4X 5, 6, 3981, 15893, m8, 9, 10 ; stp1_20, stp1_27 + + mova m13, [rsp + transposed_in + 16 * 21] + mova m14, [rsp + transposed_in + 16 * 11] + BUTTERFLY_4X 13, 14, 14053, 8423, m8, 9, 10 ; stp1_21, stp1_26 + + mova m0, [rsp + transposed_in + 16 * 13] + mova m1, [rsp + transposed_in + 16 * 19] + BUTTERFLY_4X 0, 1, 9760, 13160, m8, 9, 10 ; stp1_22, stp1_25 + + mova m2, [rsp + transposed_in + 16 * 3] + mova m3, [rsp + transposed_in + 16 * 29] + BUTTERFLY_4X 3, 2, 16207, 2404, m8, 9, 10 ; stp1_23, stp1_24 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 + SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 + SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 + SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m11, [stp + %3 + idx18] + mova m12, [stp + %3 + idx19] + SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 + SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m4 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m11 + mova [stp + %3 + idx19], m12 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m11, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m11 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 1, 0, 9 + pmulhrsw m1, m10 ; stp1_25 + pmulhrsw m0, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_25 + pmulhrsw m3, m10 ; stp1_22 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 1, 0 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + mova [stp + %3 + idx22], m0 + mova [stp + %3 + idx23], m3 + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m1 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m1, [rsp + transposed_in + 16 * 30] + BUTTERFLY_4X 0, 1, 1606, 16305, m8, 9, 10 ; stp1_8, stp1_15 + + mova m2, [rsp + transposed_in + 16 * 14] + mova m3, [rsp + transposed_in + 16 * 18] + BUTTERFLY_4X 3, 2, 12665, 10394, m8, 9, 10 ; stp1_9, stp1_14 + + mova m4, [rsp + transposed_in + 16 * 10] + mova m5, [rsp + transposed_in + 16 * 22] + BUTTERFLY_4X 4, 5, 7723, 14449, m8, 9, 10 ; stp1_10, stp1_13 + + mova m6, [rsp + transposed_in + 16 * 6] + mova m7, [rsp + transposed_in + 16 * 26] + BUTTERFLY_4X 7, 6, 15679, 4756, m8, 9, 10 ; stp1_11, stp1_12 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 + SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 + SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 + SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, [rsp + transposed_in + 16 * 28] + BUTTERFLY_4X 11, 12, 3196, 16069, m8, 9, 10 ; stp1_4, stp1_7 + + mova m13, [rsp + transposed_in + 16 * 12] + mova m14, [rsp + transposed_in + 16 * 20] + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_5, stp1_6 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m1, [rsp + transposed_in + 16 * 16] + +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 0, 1, 9 + pmulhrsw m0, m10 ; stp1_1 + pmulhrsw m1, m10 ; stp1_0 +%else + BUTTERFLY_4X 0, 1, 11585, 11585, m8, 9, 10 ; stp1_1, stp1_0 + SWAP 0, 1 +%endif + mova m2, [rsp + transposed_in + 16 * 8] + mova m3, [rsp + transposed_in + 16 * 24] + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_2, stp1_3 + + mova m10, [pw_11585x2] + SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 + SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 + SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %2 + idx12] + mova m5, [stp + %2 + idx13] + mova m6, [stp + %2 + idx14] + mova m7, [stp + %2 + idx15] + SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 + SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m10, [stp + %4 + idx31] + mova m15, [stp + %4 + idx30] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m1 + mova [stp + %4 + idx31], m10 + mova [stp + %4 + idx30], m15 + mova m0, [stp + %4 + idx29] + mova m1, [stp + %4 + idx28] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m3 + mova [stp + %4 + idx29], m0 + mova [stp + %4 + idx28], m1 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m1, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m3, [stp + %3 + idx19] + SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m4 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m6 + mova [stp + %2 + idx15], m7 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m1 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m3 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m3, [stp + %4 + idx24] + mova m2, [stp + %4 + idx25] + mova m1, [stp + %4 + idx26] + mova m0, [stp + %4 + idx27] + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + mova [stp + %4 + idx24], m3 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx27], m0 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +INIT_XMM ssse3 +cglobal idct32x32_1024_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + mov r6, 4 + lea stp, [rsp + pass_one_start] + +idct32x32_1024: + mov r3, inputq + lea r4, [rsp + transposed_in] + mov r7, 4 + +idct32x32_1024_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 +%if CONFIG_VP9_HIGHBITDEPTH + add r3, 32 +%else + add r3, 16 +%endif + add r4, 16 * 8 + dec r7 + jne idct32x32_1024_transpose + + IDCT32X32_1024 16*0, 16*32, 16*64, 16*96 + + lea stp, [stp + 16 * 8] +%if CONFIG_VP9_HIGHBITDEPTH + lea inputq, [inputq + 32 * 32] +%else + lea inputq, [inputq + 16 * 32] +%endif + dec r6 + jnz idct32x32_1024 + + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_1024_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + mov r7, 4 + +idct32x32_1024_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + + add r3, 16 * 8 + add r4, 16 * 8 + dec r7 + jne idct32x32_1024_transpose_2 + + IDCT32X32_1024 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_1024_2 + + RECON_AND_STORE pass_two_start + + RET %endif diff --git a/libvpx/vpx_dsp/x86/inv_wht_sse2.asm b/libvpx/vpx_dsp/x86/inv_wht_sse2.asm index df6f4692b..fbbcd76bd 100644 --- a/libvpx/vpx_dsp/x86/inv_wht_sse2.asm +++ b/libvpx/vpx_dsp/x86/inv_wht_sse2.asm @@ -82,9 +82,15 @@ SECTION .text INIT_XMM sse2 cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] +%else mova m0, [inputq + 0] mova m1, [inputq + 16] - +%endif psraw m0, 2 psraw m1, 2 diff --git a/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/libvpx/vpx_dsp/x86/loopfilter_avx2.c index 23a97dd05..be1087c1e 100644 --- a/libvpx/vpx_dsp/x86/loopfilter_avx2.c +++ b/libvpx/vpx_dsp/x86/loopfilter_avx2.c @@ -13,9 +13,10 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); @@ -400,9 +401,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 }; -static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); @@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, _mm_storeu_si128((__m128i *) (s + 6 * p), q6); } } - -void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh, int count) { - if (count == 1) - mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh); - else - mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh); -} diff --git a/libvpx/vpx_dsp/x86/loopfilter_mmx.asm b/libvpx/vpx_dsp/x86/loopfilter_mmx.asm deleted file mode 100644 index b9c18b680..000000000 --- a/libvpx/vpx_dsp/x86/loopfilter_mmx.asm +++ /dev/null @@ -1,611 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vpx_lpf_horizontal_4_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vpx_lpf_horizontal_4_mmx) PRIVATE -sym(vpx_lpf_horizontal_4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_h: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 ; - - - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - - psubusb mm4, mm7 - por mm1, mm4 - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, [rsi] ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - pxor mm0, mm0 ; - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - psraw mm5, 11 - packsswb mm0, mm5 - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - movq mm6, [rsi+2*rax] ; p1 - pxor mm6, [GLOBAL(t80)] ; reoffset - paddsb mm6, mm4 ; p1+= p1 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+2*rax], mm6 ; write back - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - movq [rdi], mm7 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .next8_h - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vpx_lpf_vertical_4_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vpx_lpf_vertical_4_mmx) PRIVATE -sym(vpx_lpf_vertical_4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 64 ; reserve 64 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[32]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_v: - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - - ;transpose - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - - punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64 - punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20 - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - psubusb mm5, mm7 ; q2-q3 - - psubusb mm7, mm6 ; q3-q2 - por mm7, mm5; ; mm7=abs (q3-q2) - - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - - psubusb mm3, mm6 ; q1-q2 - psubusb mm6, mm5 ; q2-q1 - - por mm6, mm3 ; mm6=abs(q2-q1) - lea rdx, srct - - movq [rdx+24], mm5 ; save q1 - movq [rdx+16], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+8], mm3 ; save p0 - - movq [rdx], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 - - psubusb mm0, mm4 - psubusb mm1, mm4 - - psubusb mm6, mm4 - por mm7, mm6 - - por mm0, mm1 - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+16] ; mm5=q0 - movq mm7, [rdx+24] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - ; start work on filters - lea rdx, srct - - movq mm2, [rdx] ; p1 - movq mm7, [rdx+24] ; q1 - - movq mm6, [rdx+8] ; p0 - movq mm0, [rdx+16] ; q0 - - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - pxor mm0, mm0 ; - - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - - psraw mm5, 11 - packsswb mm0, mm5 - - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - ; mm6=p0 ; - movq mm1, [rdx] ; p1 - pxor mm1, [GLOBAL(t80)] ; reoffset - - paddsb mm1, mm4 ; p1+= p1 add - pxor mm1, [GLOBAL(t80)] ; unoffset - ; mm6 = p0 mm1 = p1 - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; mm3 = q0 - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - ; mm7 = q1 - - ; transpose and write back - ; mm1 = 72 62 52 42 32 22 12 02 - ; mm6 = 73 63 53 43 33 23 13 03 - ; mm3 = 74 64 54 44 34 24 14 04 - ; mm7 = 75 65 55 45 35 25 15 05 - - movq mm2, mm1 ; 72 62 52 42 32 22 12 02 - punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02 - - movq mm4, mm3 ; 74 64 54 44 34 24 14 04 - punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42 - - punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04 - punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44 - - movq mm6, mm2 ; 33 32 23 22 13 12 03 02 - punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02 - - punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22 - movq mm5, mm1 ; 73 72 63 62 53 52 43 42 - - punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42 - punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62 - - - ; mm2 = 15 14 13 12 05 04 03 02 - ; mm6 = 35 34 33 32 25 24 23 22 - ; mm5 = 55 54 53 52 45 44 43 42 - ; mm1 = 75 74 73 72 65 64 63 62 - - - - movd [rsi+rax*4+2], mm2 - psrlq mm2, 32 - - movd [rdi+rax*4+2], mm2 - movd [rsi+rax*2+2], mm6 - - psrlq mm6, 32 - movd [rsi+rax+2],mm6 - - movd [rsi+2], mm1 - psrlq mm1, 32 - - movd [rdi+2], mm1 - neg rax - - movd [rdi+rax+2],mm5 - psrlq mm5, 32 - - movd [rdi+rax*2+2], mm5 - - lea rsi, [rsi+rax*8] - dec rcx - jnz .next8_v - - add rsp, 64 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -tfe: - times 8 db 0xfe -align 16 -t80: - times 8 db 0x80 -align 16 -t3: - times 8 db 0x03 -align 16 -t4: - times 8 db 0x04 -align 16 -ones: - times 4 dw 0x0001 diff --git a/libvpx/vpx_dsp/x86/loopfilter_sse2.c b/libvpx/vpx_dsp/x86/loopfilter_sse2.c index ed1012736..739adf31d 100644 --- a/libvpx/vpx_dsp/x86/loopfilter_sse2.c +++ b/libvpx/vpx_dsp/x86/loopfilter_sse2.c @@ -18,11 +18,216 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); } -static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +// filter_mask and hev_mask +#define FILTER_HEV_MASK do { \ + /* (abs(q1 - q0), abs(p1 - p0) */ \ + __m128i flat = abs_diff(q1p1, q0p0); \ + /* abs(p1 - q1), abs(p0 - q0) */ \ + const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \ + __m128i abs_p0q0, abs_p1q1, work; \ + \ + /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ + hev = _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ + hev = _mm_cmpgt_epi16(hev, thresh); \ + hev = _mm_packs_epi16(hev, hev); \ + \ + /* const int8_t mask = filter_mask(*limit, *blimit, */ \ + /* p3, p2, p1, p0, q0, q1, q2, q3); */ \ + abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */\ + abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */\ + abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \ + abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \ + /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \ + mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \ + /* abs(p3 - p2), abs(p2 - p1) */ \ + work = abs_diff(p3p2, p2p1); \ + flat = _mm_max_epu8(work, flat); \ + /* abs(q3 - q2), abs(q2 - q1) */ \ + work = abs_diff(q3q2, q2q1); \ + flat = _mm_max_epu8(work, flat); \ + flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ + mask = _mm_unpacklo_epi64(mask, flat); \ + mask = _mm_subs_epu8(mask, limit); \ + mask = _mm_cmpeq_epi8(mask, zero); \ + mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ +} while (0) + +#define FILTER4 do { \ + const __m128i t3t4 = _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, \ + 4, 4, 4, 4, 4, 4, 4, 4); \ + const __m128i t80 = _mm_set1_epi8(0x80); \ + __m128i filter, filter2filter1, work; \ + \ + ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ + qs1qs0 = _mm_xor_si128(q1q0, t80); \ + \ + /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \ + work = _mm_subs_epi8(ps1ps0, qs1qs0); \ + filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \ + /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \ + filter = _mm_subs_epi8(filter, work); \ + filter = _mm_subs_epi8(filter, work); \ + filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \ + filter = _mm_and_si128(filter, mask); /* & mask */ \ + filter = _mm_unpacklo_epi64(filter, filter); \ + \ + /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \ + /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \ + filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \ + filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \ + filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \ + filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \ + filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \ + filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \ + \ + /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \ + filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \ + filter = _mm_unpacklo_epi8(filter, filter); \ + filter = _mm_srai_epi16(filter, 9); /* round */ \ + filter = _mm_packs_epi16(filter, filter); \ + filter = _mm_andnot_si128(hev, filter); \ + \ + hev = _mm_unpackhi_epi64(filter2filter1, filter); \ + filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \ + \ + /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \ + qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \ + /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \ + ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \ + qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \ + ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ +} while (0) + +void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, + const uint8_t *_blimit, const uint8_t *_limit, + const uint8_t *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i limit = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), + _mm_loadl_epi64((const __m128i *)_limit)); + const __m128i thresh = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i ff = _mm_cmpeq_epi8(zero, zero); + __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; + __m128i mask, hev; + + p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), + _mm_loadl_epi64((__m128i *)(s - 4 * p))); + q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), + _mm_loadl_epi64((__m128i *)(s + 1 * p))); + q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), + _mm_loadl_epi64((__m128i *)(s + 0 * p))); + q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)), + _mm_loadl_epi64((__m128i *)(s + 3 * p))); + p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); + p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); + q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); + q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); + + FILTER_HEV_MASK; + FILTER4; + + _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1 + _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0 + _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0 + _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1 +} + +void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, + const uint8_t *_blimit, const uint8_t *_limit, + const uint8_t *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i limit = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), + _mm_loadl_epi64((const __m128i *)_limit)); + const __m128i thresh = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i ff = _mm_cmpeq_epi8(zero, zero); + __m128i x0, x1, x2, x3; + __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; + __m128i mask, hev; + + // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 + q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 1 * p - 4))); + + // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 + x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 3 * p - 4))); + + // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 + x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 5 * p - 4))); + + // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 + x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 7 * p - 4))); + + // Transpose 8x8 + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + p1p0 = _mm_unpacklo_epi16(q1q0, x1); + // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 + x0 = _mm_unpacklo_epi16(x2, x3); + // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + p3p2 = _mm_unpacklo_epi32(p1p0, x0); + // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + p1p0 = _mm_unpackhi_epi32(p1p0, x0); + p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high + p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high + + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + q1q0 = _mm_unpackhi_epi16(q1q0, x1); + // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 + x2 = _mm_unpackhi_epi16(x2, x3); + // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 + q3q2 = _mm_unpackhi_epi32(q1q0, x2); + // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 + q1q0 = _mm_unpacklo_epi32(q1q0, x2); + + q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); + q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); + p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); + p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); + q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); + + FILTER_HEV_MASK; + FILTER4; + + // Transpose 8x4 to 4x8 + // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37 + // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07 + // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 + ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8)); + // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37 + x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0); + // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27 + ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0); + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0); + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); + + *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + + *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); +} + +void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); @@ -383,11 +588,10 @@ static INLINE __m128i filter16_mask(const __m128i *const flat, return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); } -static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); @@ -716,21 +920,10 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, } } -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh, int count) { - if (count == 1) - mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh); - else - mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh); -} - void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh, int count) { + const unsigned char *_thresh) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); @@ -745,8 +938,6 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; - (void)count; - q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), _mm_loadl_epi64((__m128i *)(s + 3 * p))); q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), @@ -1492,11 +1683,10 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, - const unsigned char *thresh, int count) { + const unsigned char *thresh) { DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); unsigned char *src[1]; unsigned char *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; @@ -1505,7 +1695,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 1); // Loop filtering - vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1); + vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); src[0] = t_dst; dst[0] = s - 4; @@ -1557,7 +1747,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 2); // Loop filtering - mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh); + vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); src[0] = t_dst; src[1] = t_dst + 8 * 8; @@ -1578,8 +1768,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); // Loop filtering - mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh); + vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); // Transpose back transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); diff --git a/libvpx/vpx_dsp/x86/sad4d_sse2.asm b/libvpx/vpx_dsp/x86/sad4d_sse2.asm index a2f0ae79e..3f6e55ce9 100644 --- a/libvpx/vpx_dsp/x86/sad4d_sse2.asm +++ b/libvpx/vpx_dsp/x86/sad4d_sse2.asm @@ -20,33 +20,41 @@ SECTION .text movd m4, [ref2q+%3] movd m7, [ref3q+%3] movd m5, [ref4q+%3] - punpckldq m0, [srcq +%4] - punpckldq m6, [ref1q+%5] - punpckldq m4, [ref2q+%5] - punpckldq m7, [ref3q+%5] - punpckldq m5, [ref4q+%5] + movd m1, [srcq +%4] + movd m2, [ref1q+%5] + punpckldq m0, m1 + punpckldq m6, m2 + movd m1, [ref2q+%5] + movd m2, [ref3q+%5] + movd m3, [ref4q+%5] + punpckldq m4, m1 + punpckldq m7, m2 + punpckldq m5, m3 + movlhps m0, m0 + movlhps m6, m4 + movlhps m7, m5 psadbw m6, m0 - psadbw m4, m0 psadbw m7, m0 - psadbw m5, m0 - punpckldq m6, m4 - punpckldq m7, m5 %else movd m1, [ref1q+%3] + movd m5, [ref1q+%5] movd m2, [ref2q+%3] + movd m4, [ref2q+%5] + punpckldq m1, m5 + punpckldq m2, m4 movd m3, [ref3q+%3] + movd m5, [ref3q+%5] + punpckldq m3, m5 movd m4, [ref4q+%3] - punpckldq m0, [srcq +%4] - punpckldq m1, [ref1q+%5] - punpckldq m2, [ref2q+%5] - punpckldq m3, [ref3q+%5] - punpckldq m4, [ref4q+%5] + movd m5, [ref4q+%5] + punpckldq m4, m5 + movd m5, [srcq +%4] + punpckldq m0, m5 + movlhps m0, m0 + movlhps m1, m2 + movlhps m3, m4 psadbw m1, m0 - psadbw m2, m0 psadbw m3, m0 - psadbw m4, m0 - punpckldq m1, m2 - punpckldq m3, m4 paddd m6, m1 paddd m7, m3 %endif @@ -170,7 +178,7 @@ SECTION .text ; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, ; uint8_t *ref[4], int ref_stride, ; uint32_t res[4]); -; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 +; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4 %macro SADNXN4D 2 %if UNIX64 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ @@ -192,7 +200,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ %endrep PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 -%if mmsize == 16 +%if %1 > 4 pslldq m5, 4 pslldq m7, 4 por m4, m5 @@ -207,8 +215,10 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ RET %else movifnidn r4, r4mp - movq [r4+0], m6 - movq [r4+8], m7 + pshufd m6, m6, 0x08 + pshufd m7, m7, 0x08 + movq [r4+0], m6 + movq [r4+8], m7 RET %endif %endmacro @@ -225,7 +235,5 @@ SADNXN4D 16, 8 SADNXN4D 8, 16 SADNXN4D 8, 8 SADNXN4D 8, 4 - -INIT_MMX sse SADNXN4D 4, 8 SADNXN4D 4, 4 diff --git a/libvpx/vpx_dsp/x86/sad_mmx.asm b/libvpx/vpx_dsp/x86/sad_mmx.asm deleted file mode 100644 index 9968992bd..000000000 --- a/libvpx/vpx_dsp/x86/sad_mmx.asm +++ /dev/null @@ -1,427 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -global sym(vpx_sad16x16_mmx) PRIVATE -global sym(vpx_sad8x16_mmx) PRIVATE -global sym(vpx_sad8x8_mmx) PRIVATE -global sym(vpx_sad4x4_mmx) PRIVATE -global sym(vpx_sad16x8_mmx) PRIVATE - -;unsigned int vpx_sad16x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpcklbw mm2, mm6 - - punpckhbw mm1, mm6 - punpckhbw mm3, mm6 - - paddw mm0, mm2 - paddw mm1, mm3 - - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - paddw mm7, mm1 - - cmp rsi, rcx - jne .x16x16sad_mmx_loop - - - movq mm0, mm7 - - punpcklwd mm0, mm6 - punpckhwd mm7, mm6 - - paddw mm0, mm7 - movq mm7, mm0 - - - psrlq mm0, 32 - paddw mm7, mm0 - - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad8x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad8x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - paddw mm7, mm2 - cmp rsi, rcx - - jne .x8x16sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad8x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x8sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - paddw mm0, mm2 - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - cmp rsi, rcx - - jne .x8x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad4x4_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - pxor mm3, mm3 - - punpcklbw mm0, mm3 - punpckhbw mm2, mm3 - - paddw mm0, mm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm4, DWORD PTR [rsi] - movd mm5, DWORD PTR [rdi] - - movd mm6, DWORD PTR [rsi+rax] - movd mm7, DWORD PTR [rdi+rdx] - - punpcklbw mm4, mm6 - punpcklbw mm5, mm7 - - movq mm6, mm4 - psubusb mm4, mm5 - - psubusb mm5, mm6 - por mm4, mm5 - - movq mm5, mm4 - punpcklbw mm4, mm3 - - punpckhbw mm5, mm3 - paddw mm4, mm5 - - paddw mm0, mm4 - movq mm1, mm0 - - punpcklwd mm0, mm3 - punpckhwd mm1, mm3 - - paddw mm0, mm1 - movq mm1, mm0 - - psrlq mm0, 32 - paddw mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vpx_sad16x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vpx_sad16x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x8sad_mmx_loop: - - movq mm0, [rsi] - movq mm1, [rdi] - - movq mm2, [rsi+8] - movq mm3, [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpckhbw mm1, mm6 - - punpcklbw mm2, mm6 - punpckhbw mm3, mm6 - - - paddw mm0, mm2 - paddw mm1, mm3 - - paddw mm0, mm1 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x16x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/libvpx/vpx_dsp/x86/sad_sse2.asm b/libvpx/vpx_dsp/x86/sad_sse2.asm index 0defe1b6d..1ec906c23 100644 --- a/libvpx/vpx_dsp/x86/sad_sse2.asm +++ b/libvpx/vpx_dsp/x86/sad_sse2.asm @@ -17,7 +17,7 @@ SECTION .text %if %3 == 5 cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows %else ; %3 == 7 -cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \ +cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \ src_stride3, ref_stride3, n_rows %endif ; %3 == 5/7 %else ; avg @@ -25,7 +25,7 @@ cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \ cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 5, src, src_stride, \ +cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 @@ -222,8 +222,8 @@ SAD8XN 16, 1 ; sad8x16_avg_sse2 SAD8XN 8, 1 ; sad8x8_avg_sse2 SAD8XN 4, 1 ; sad8x4_avg_sse2 -; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride, -; uint8_t *ref, int ref_stride); +; unsigned int vpx_sad4x{4, 8}_sse2(uint8_t *src, int src_stride, +; uint8_t *ref, int ref_stride); %macro SAD4XN 1-2 0 SAD_FN 4, %1, 7, %2 mov n_rowsd, %1/4 @@ -236,31 +236,32 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2 movd m4, [refq+ref_stride3q] punpckldq m1, m2 punpckldq m3, m4 + movlhps m1, m3 %if %2 == 1 pavgb m1, [second_predq+mmsize*0] - pavgb m3, [second_predq+mmsize*1] - lea second_predq, [second_predq+mmsize*2] + lea second_predq, [second_predq+mmsize*1] %endif movd m2, [srcq] movd m5, [srcq+src_strideq] movd m4, [srcq+src_strideq*2] - movd m6, [srcq+src_stride3q] + movd m3, [srcq+src_stride3q] punpckldq m2, m5 - punpckldq m4, m6 + punpckldq m4, m3 + movlhps m2, m4 psadbw m1, m2 - psadbw m3, m4 lea refq, [refq+ref_strideq*4] paddd m0, m1 lea srcq, [srcq+src_strideq*4] - paddd m0, m3 dec n_rowsd jg .loop + movhlps m1, m0 + paddd m0, m1 movd eax, m0 RET %endmacro -INIT_MMX sse +INIT_XMM sse2 SAD4XN 8 ; sad4x8_sse SAD4XN 4 ; sad4x4_sse SAD4XN 8, 1 ; sad4x8_avg_sse diff --git a/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm b/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm index 05dcff75e..cee4468c1 100644 --- a/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm +++ b/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm @@ -57,8 +57,8 @@ SECTION .text paddd %6, %1 %endmacro -%macro STORE_AND_RET 0 -%if mmsize == 16 +%macro STORE_AND_RET 1 +%if %1 > 4 ; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit ; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg. ; We have to sign-extend it before adding the words within the register @@ -78,16 +78,16 @@ SECTION .text movd [r1], m7 ; store sse paddd m6, m4 movd raxd, m6 ; store sum as return value -%else ; mmsize == 8 - pshufw m4, m6, 0xe - pshufw m3, m7, 0xe +%else ; 4xh + pshuflw m4, m6, 0xe + pshuflw m3, m7, 0xe paddw m6, m4 paddd m7, m3 pcmpgtw m5, m6 ; mask for 0 > x mov r1, ssem ; r1 = unsigned int *sse punpcklwd m6, m5 ; sign-extend m6 word->dword movd [r1], m7 ; store sse - pshufw m4, m6, 0xe + pshuflw m4, m6, 0xe paddd m6, m4 movd raxd, m6 ; store sum as return value %endif @@ -139,8 +139,10 @@ SECTION .text %define sec_str sec_stridemp ;Store bilin_filter and pw_8 location in stack - GET_GOT eax - add esp, 4 ; restore esp + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -156,8 +158,10 @@ SECTION .text %define block_height heightd ;Store bilin_filter and pw_8 location in stack - GET_GOT eax - add esp, 4 ; restore esp + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -192,6 +196,12 @@ SECTION .text %endif %endif +%if %1 == 4 + %define movx movd +%else + %define movx movh +%endif + ASSERT %1 <= 16 ; m6 overflows if w > 16 pxor m6, m6 ; sum pxor m7, m7 ; sse @@ -224,6 +234,7 @@ SECTION .text %endif punpckhbw m2, m0, m5 punpcklbw m0, m5 + %if %2 == 0 ; !avg punpckhbw m3, m1, m5 punpcklbw m1, m5 @@ -233,24 +244,37 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] + movx m0, [srcq] %if %2 == 1 ; avg -%if mmsize == 16 +%if %1 > 4 movhps m0, [srcq+src_strideq] -%else ; mmsize == 8 - punpckldq m0, [srcq+src_strideq] +%else ; 4xh + movx m1, [srcq+src_strideq] + punpckldq m0, m1 %endif %else ; !avg - movh m2, [srcq+src_strideq] + movx m2, [srcq+src_strideq] %endif - movh m1, [dstq] - movh m3, [dstq+dst_strideq] + + movx m1, [dstq] + movx m3, [dstq+dst_strideq] + %if %2 == 1 ; avg +%if %1 > 4 pavgb m0, [secq] +%else + movh m2, [secq] + pavgb m0, m2 +%endif punpcklbw m3, m5 punpcklbw m1, m5 +%if %1 > 4 punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else ; 4xh + punpcklbw m0, m5 + movhlps m2, m0 +%endif %else ; !avg punpcklbw m0, m5 punpcklbw m2, m5 @@ -267,10 +291,10 @@ SECTION .text %endif dec block_height jg .x_zero_y_zero_loop - STORE_AND_RET + STORE_AND_RET %1 .x_zero_y_nonzero: - cmp y_offsetd, 8 + cmp y_offsetd, 4 jne .x_zero_y_nonhalf ; x_offset == 0 && y_offset == 0.5 @@ -292,37 +316,41 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m2, [srcq+src_strideq] + movx m0, [srcq] + movx m2, [srcq+src_strideq] %if %2 == 1 ; avg -%if mmsize == 16 +%if %1 > 4 movhps m2, [srcq+src_strideq*2] -%else ; mmsize == 8 -%if %1 == 4 - movh m1, [srcq+src_strideq*2] +%else ; 4xh + movx m1, [srcq+src_strideq*2] punpckldq m2, m1 -%else - punpckldq m2, [srcq+src_strideq*2] -%endif %endif - movh m1, [dstq] -%if mmsize == 16 + movx m1, [dstq] +%if %1 > 4 movlhps m0, m2 -%else ; mmsize == 8 +%else ; 4xh punpckldq m0, m2 %endif - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] pavgb m0, m2 punpcklbw m1, m5 +%if %1 > 4 pavgb m0, [secq] punpcklbw m3, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else ; 4xh + movh m4, [secq] + pavgb m0, m4 + punpcklbw m3, m5 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %else ; !avg - movh m4, [srcq+src_strideq*2] - movh m1, [dstq] + movx m4, [srcq+src_strideq*2] + movx m1, [dstq] pavgb m0, m2 - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 @@ -339,7 +367,7 @@ SECTION .text %endif dec block_height jg .x_zero_y_half_loop - STORE_AND_RET + STORE_AND_RET %1 .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation @@ -347,7 +375,7 @@ SECTION .text lea bilin_filter, [bilin_filter_m] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] @@ -420,12 +448,12 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m2, [srcq+src_strideq] - movh m4, [srcq+src_strideq*2] - movh m3, [dstq+dst_strideq] + movx m0, [srcq] + movx m2, [srcq+src_strideq] + movx m4, [srcq+src_strideq*2] + movx m3, [dstq+dst_strideq] %if cpuflag(ssse3) - movh m1, [dstq] + movx m1, [dstq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a @@ -445,17 +473,27 @@ SECTION .text pmullw m4, filter_y_b paddw m0, m1 paddw m2, filter_rnd - movh m1, [dstq] + movx m1, [dstq] paddw m2, m4 %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline +%if %1 == 4 + movlhps m0, m2 +%endif packuswb m0, m2 +%if %1 > 4 pavgb m0, [secq] punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else ; 4xh + movh m2, [secq] + pavgb m0, m2 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 @@ -471,10 +509,10 @@ SECTION .text %undef filter_y_a %undef filter_y_b %undef filter_rnd - STORE_AND_RET + STORE_AND_RET %1 .x_nonzero: - cmp x_offsetd, 8 + cmp x_offsetd, 4 jne .x_nonhalf ; x_offset == 0.5 test y_offsetd, y_offsetd @@ -499,30 +537,40 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m4, [srcq+1] + movx m0, [srcq] + movx m4, [srcq+1] %if %2 == 1 ; avg -%if mmsize == 16 +%if %1 > 4 movhps m0, [srcq+src_strideq] movhps m4, [srcq+src_strideq+1] -%else ; mmsize == 8 - punpckldq m0, [srcq+src_strideq] - punpckldq m4, [srcq+src_strideq+1] -%endif - movh m1, [dstq] - movh m3, [dstq+dst_strideq] +%else ; 4xh + movx m1, [srcq+src_strideq] + punpckldq m0, m1 + movx m2, [srcq+src_strideq+1] + punpckldq m4, m2 +%endif + movx m1, [dstq] + movx m3, [dstq+dst_strideq] pavgb m0, m4 punpcklbw m3, m5 +%if %1 > 4 pavgb m0, [secq] punpcklbw m1, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else ; 4xh + movh m2, [secq] + pavgb m0, m2 + punpcklbw m1, m5 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %else ; !avg - movh m2, [srcq+src_strideq] - movh m1, [dstq] + movx m2, [srcq+src_strideq] + movx m1, [dstq] pavgb m0, m4 - movh m4, [srcq+src_strideq+1] - movh m3, [dstq+dst_strideq] + movx m4, [srcq+src_strideq+1] + movx m3, [dstq+dst_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 @@ -539,10 +587,10 @@ SECTION .text %endif dec block_height jg .x_half_y_zero_loop - STORE_AND_RET + STORE_AND_RET %1 .x_half_y_nonzero: - cmp y_offsetd, 8 + cmp y_offsetd, 4 jne .x_half_y_nonhalf ; x_offset == 0.5 && y_offset == 0.5 @@ -574,53 +622,58 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m3, [srcq+1] + movx m0, [srcq] + movx m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 .x_half_y_half_loop: - movh m2, [srcq] - movh m3, [srcq+1] + movx m2, [srcq] + movx m3, [srcq+1] %if %2 == 1 ; avg -%if mmsize == 16 +%if %1 > 4 movhps m2, [srcq+src_strideq] movhps m3, [srcq+src_strideq+1] %else -%if %1 == 4 - movh m1, [srcq+src_strideq] + movx m1, [srcq+src_strideq] punpckldq m2, m1 - movh m1, [srcq+src_strideq+1] + movx m1, [srcq+src_strideq+1] punpckldq m3, m1 -%else - punpckldq m2, [srcq+src_strideq] - punpckldq m3, [srcq+src_strideq+1] -%endif %endif pavgb m2, m3 -%if mmsize == 16 +%if %1 > 4 movlhps m0, m2 movhlps m4, m2 -%else ; mmsize == 8 +%else ; 4xh punpckldq m0, m2 - pshufw m4, m2, 0xe + pshuflw m4, m2, 0xe %endif - movh m1, [dstq] + movx m1, [dstq] pavgb m0, m2 - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] +%if %1 > 4 pavgb m0, [secq] +%else + movh m2, [secq] + pavgb m0, m2 +%endif punpcklbw m3, m5 punpcklbw m1, m5 +%if %1 > 4 punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else + punpcklbw m0, m5 + movhlps m2, m0 +%endif %else ; !avg - movh m4, [srcq+src_strideq] - movh m1, [srcq+src_strideq+1] + movx m4, [srcq+src_strideq] + movx m1, [srcq+src_strideq+1] pavgb m2, m3 pavgb m4, m1 pavgb m0, m2 pavgb m2, m4 - movh m1, [dstq] - movh m3, [dstq+dst_strideq] + movx m1, [dstq] + movx m3, [dstq+dst_strideq] punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 @@ -637,7 +690,7 @@ SECTION .text %endif dec block_height jg .x_half_y_half_loop - STORE_AND_RET + STORE_AND_RET %1 .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation @@ -645,7 +698,7 @@ SECTION .text lea bilin_filter, [bilin_filter_m] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] @@ -720,23 +773,23 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m3, [srcq+1] + movx m0, [srcq] + movx m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 %if notcpuflag(ssse3) punpcklbw m0, m5 %endif .x_half_y_other_loop: - movh m2, [srcq] - movh m1, [srcq+1] - movh m4, [srcq+src_strideq] - movh m3, [srcq+src_strideq+1] + movx m2, [srcq] + movx m1, [srcq+1] + movx m4, [srcq+src_strideq] + movx m3, [srcq+src_strideq+1] pavgb m2, m1 pavgb m4, m3 - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] %if cpuflag(ssse3) - movh m1, [dstq] + movx m1, [dstq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a @@ -756,16 +809,26 @@ SECTION .text pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m2, m1 - movh m1, [dstq] + movx m1, [dstq] %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline +%if %1 == 4 + movlhps m0, m2 +%endif packuswb m0, m2 +%if %1 > 4 pavgb m0, [secq] punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else + movh m2, [secq] + pavgb m0, m2 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 @@ -782,7 +845,7 @@ SECTION .text %undef filter_y_a %undef filter_y_b %undef filter_rnd - STORE_AND_RET + STORE_AND_RET %1 .x_nonhalf: test y_offsetd, y_offsetd @@ -793,7 +856,7 @@ SECTION .text lea bilin_filter, [bilin_filter_m] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -861,14 +924,14 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m1, [srcq+1] - movh m2, [srcq+src_strideq] - movh m4, [srcq+src_strideq+1] - movh m3, [dstq+dst_strideq] + movx m0, [srcq] + movx m1, [srcq+1] + movx m2, [srcq+src_strideq] + movx m4, [srcq+src_strideq+1] + movx m3, [dstq+dst_strideq] %if cpuflag(ssse3) punpcklbw m0, m1 - movh m1, [dstq] + movx m1, [dstq] punpcklbw m2, m4 pmaddubsw m0, filter_x_a pmaddubsw m2, filter_x_a @@ -888,17 +951,27 @@ SECTION .text pmullw m4, filter_x_b paddw m0, m1 paddw m2, filter_rnd - movh m1, [dstq] + movx m1, [dstq] paddw m2, m4 %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline +%if %1 == 4 + movlhps m0, m2 +%endif packuswb m0, m2 +%if %1 > 4 pavgb m0, [secq] punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else + movh m2, [secq] + pavgb m0, m2 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 @@ -914,10 +987,10 @@ SECTION .text %undef filter_x_a %undef filter_x_b %undef filter_rnd - STORE_AND_RET + STORE_AND_RET %1 .x_nonhalf_y_nonzero: - cmp y_offsetd, 8 + cmp y_offsetd, 4 jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 @@ -925,7 +998,7 @@ SECTION .text lea bilin_filter, [bilin_filter_m] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -1033,8 +1106,8 @@ SECTION .text add srcq, src_strideq add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m1, [srcq+1] + movx m0, [srcq] + movx m1, [srcq+1] %if cpuflag(ssse3) punpcklbw m0, m1 pmaddubsw m0, filter_x_a @@ -1050,17 +1123,17 @@ SECTION .text add srcq, src_strideq psraw m0, 4 .x_other_y_half_loop: - movh m2, [srcq] - movh m1, [srcq+1] - movh m4, [srcq+src_strideq] - movh m3, [srcq+src_strideq+1] + movx m2, [srcq] + movx m1, [srcq+1] + movx m4, [srcq+src_strideq] + movx m3, [srcq+src_strideq+1] %if cpuflag(ssse3) punpcklbw m2, m1 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a - movh m1, [dstq] - movh m3, [dstq+dst_strideq] + movx m1, [dstq] + movx m3, [dstq+dst_strideq] paddw m2, filter_rnd paddw m4, filter_rnd %else @@ -1075,9 +1148,9 @@ SECTION .text pmullw m3, filter_x_b paddw m4, filter_rnd paddw m2, m1 - movh m1, [dstq] + movx m1, [dstq] paddw m4, m3 - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] %endif psraw m2, 4 psraw m4, 4 @@ -1085,10 +1158,20 @@ SECTION .text pavgw m2, m4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline - also consider going to bytes here +%if %1 == 4 + movlhps m0, m2 +%endif packuswb m0, m2 +%if %1 > 4 pavgb m0, [secq] punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else + movh m2, [secq] + pavgb m0, m2 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %endif punpcklbw m3, m5 punpcklbw m1, m5 @@ -1106,7 +1189,7 @@ SECTION .text %undef filter_x_a %undef filter_x_b %undef filter_rnd - STORE_AND_RET + STORE_AND_RET %1 .x_nonhalf_y_nonhalf: %ifdef PIC @@ -1114,7 +1197,7 @@ SECTION .text %endif shl x_offsetd, filter_idx_shift shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -1257,8 +1340,8 @@ SECTION .text INC_SRC_BY_SRC_STRIDE add dstq, dst_strideq %else ; %1 < 16 - movh m0, [srcq] - movh m1, [srcq+1] + movx m0, [srcq] + movx m1, [srcq+1] %if cpuflag(ssse3) punpcklbw m0, m1 pmaddubsw m0, filter_x_a @@ -1279,20 +1362,20 @@ SECTION .text INC_SRC_BY_SRC_STRIDE .x_other_y_other_loop: - movh m2, [srcq] - movh m1, [srcq+1] + movx m2, [srcq] + movx m1, [srcq+1] INC_SRC_BY_SRC_STRIDE - movh m4, [srcq] - movh m3, [srcq+1] + movx m4, [srcq] + movx m3, [srcq+1] %if cpuflag(ssse3) punpcklbw m2, m1 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a - movh m3, [dstq+dst_strideq] - movh m1, [dstq] + movx m3, [dstq+dst_strideq] + movx m1, [dstq] paddw m2, filter_rnd paddw m4, filter_rnd psraw m2, 4 @@ -1331,9 +1414,9 @@ SECTION .text pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m0, m3 - movh m3, [dstq+dst_strideq] + movx m3, [dstq+dst_strideq] paddw m2, m1 - movh m1, [dstq] + movx m1, [dstq] psraw m0, 4 psraw m2, 4 punpcklbw m3, m5 @@ -1341,10 +1424,20 @@ SECTION .text %endif %if %2 == 1 ; avg ; FIXME(rbultje) pipeline +%if %1 == 4 + movlhps m0, m2 +%endif packuswb m0, m2 +%if %1 > 4 pavgb m0, [secq] punpckhbw m2, m0, m5 punpcklbw m0, m5 +%else + movh m2, [secq] + pavgb m0, m2 + punpcklbw m0, m5 + movhlps m2, m0 +%endif %endif SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 @@ -1362,7 +1455,8 @@ SECTION .text %undef filter_y_a %undef filter_y_b %undef filter_rnd - STORE_AND_RET +%undef movx + STORE_AND_RET %1 %endmacro ; FIXME(rbultje) the non-bilinear versions (i.e. x=0,8&&y=0,8) are identical @@ -1371,26 +1465,22 @@ SECTION .text ; location in the sse/2 version, rather than duplicating that code in the ; binary. -INIT_MMX sse -SUBPEL_VARIANCE 4 INIT_XMM sse2 +SUBPEL_VARIANCE 4 SUBPEL_VARIANCE 8 SUBPEL_VARIANCE 16 -INIT_MMX ssse3 -SUBPEL_VARIANCE 4 INIT_XMM ssse3 +SUBPEL_VARIANCE 4 SUBPEL_VARIANCE 8 SUBPEL_VARIANCE 16 -INIT_MMX sse -SUBPEL_VARIANCE 4, 1 INIT_XMM sse2 +SUBPEL_VARIANCE 4, 1 SUBPEL_VARIANCE 8, 1 SUBPEL_VARIANCE 16, 1 -INIT_MMX ssse3 -SUBPEL_VARIANCE 4, 1 INIT_XMM ssse3 +SUBPEL_VARIANCE 4, 1 SUBPEL_VARIANCE 8, 1 SUBPEL_VARIANCE 16, 1 diff --git a/libvpx/vpx_dsp/x86/variance_avx2.c b/libvpx/vpx_dsp/x86/variance_avx2.c index 7851a98b1..f8c97117d 100644 --- a/libvpx/vpx_dsp/x86/variance_avx2.c +++ b/libvpx/vpx_dsp/x86/variance_avx2.c @@ -45,7 +45,7 @@ unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride, int sum; variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, vpx_get16x16var_avx2, 16); - return *sse - (((unsigned int)sum * sum) >> 8); + return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8); } unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride, diff --git a/libvpx/vpx_dsp/x86/variance_impl_mmx.asm b/libvpx/vpx_dsp/x86/variance_impl_mmx.asm deleted file mode 100644 index b8ba79b65..000000000 --- a/libvpx/vpx_dsp/x86/variance_impl_mmx.asm +++ /dev/null @@ -1,744 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%define mmx_filter_shift 7 - -;unsigned int vpx_get_mb_ss_mmx( short *src_ptr ) -global sym(vpx_get_mb_ss_mmx) PRIVATE -sym(vpx_get_mb_ss_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 8 - ; end prolog - - mov rax, arg(0) ;src_ptr - mov rcx, 16 - pxor mm4, mm4 - -.NEXTROW: - movq mm0, [rax] - movq mm1, [rax+8] - movq mm2, [rax+16] - movq mm3, [rax+24] - pmaddwd mm0, mm0 - pmaddwd mm1, mm1 - pmaddwd mm2, mm2 - pmaddwd mm3, mm3 - - paddd mm4, mm0 - paddd mm4, mm1 - paddd mm4, mm2 - paddd mm4, mm3 - - add rax, 32 - dec rcx - ja .NEXTROW - movq QWORD PTR [rsp], mm4 - - ;return sum[0]+sum[1]; - movsxd rax, dword ptr [rsp] - movsxd rcx, dword ptr [rsp+4] - add rax, rcx - - ; begin epilog - add rsp, 8 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_get8x8var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vpx_get8x8var_mmx) PRIVATE -sym(vpx_get8x8var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm1, [rbx] ; Copy eight bytes to mm1 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 2 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 3 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 4 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 5 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - ; movq mm4, [rbx + rdx] - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 6 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 7 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 8 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void -;vpx_get4x4var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vpx_get4x4var_mmx) PRIVATE -sym(vpx_get4x4var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movd mm0, [rax] ; Copy four bytes to mm0 - movd mm1, [rbx] ; Copy four bytes to mm1 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 2 - movd mm0, [rax] ; Copy four bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 3 - movd mm0, [rax] ; Copy four bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher precision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 4 - movd mm0, [rax] ; Copy four bytes to mm0 - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - paddd mm7, mm0 ; accumulate in mm7 - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block2d_bil4x4_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vpx_filter_block2d_bil4x4_var_mmx) PRIVATE -sym(vpx_filter_block2d_bil4x4_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - - mov rax, arg(4) ;HFilter ; - mov rdx, arg(5) ;VFilter ; - - mov rsi, arg(0) ;ref_ptr ; - mov rdi, arg(2) ;src_ptr ; - - mov rcx, 4 ; - pxor mm0, mm0 ; - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm5, mm1 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - add rsi, r8 -%endif - -.filter_block2d_bil4x4_var_mmx_loop: - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm3, mm5 ; - - movq mm5, mm1 ; - pmullw mm3, [rdx] ; - - pmullw mm1, [rdx+8] ; - paddw mm1, mm3 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - movd mm3, [rdi] ; - punpcklbw mm3, mm0 ; - - psubw mm1, mm3 ; - paddw mm6, mm1 ; - - pmaddwd mm1, mm1 ; - paddd mm7, mm1 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil4x4_var_mmx_loop ; - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(6) ;sum - mov rsi, arg(7) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block2d_bil_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vpx_filter_block2d_bil_var_mmx) PRIVATE -sym(vpx_filter_block2d_bil_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov rax, arg(5) ;HFilter ; - - mov rdx, arg(6) ;VFilter ; - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - - pxor mm0, mm0 ; - movq mm1, [rsi] ; - - movq mm3, [rsi+1] ; - movq mm2, mm1 ; - - movq mm4, mm3 ; - punpcklbw mm1, mm0 ; - - punpckhbw mm2, mm0 ; - pmullw mm1, [rax] ; - - pmullw mm2, [rax] ; - punpcklbw mm3, mm0 ; - - punpckhbw mm4, mm0 ; - pmullw mm3, [rax+8] ; - - pmullw mm4, [rax+8] ; - paddw mm1, mm3 ; - - paddw mm2, mm4 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm2, mmx_filter_shift ; - movq mm5, mm1 - - packuswb mm5, mm2 ; -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - add rsi, r8 -%endif - -.filter_block2d_bil_var_mmx_loop: - - movq mm1, [rsi] ; - movq mm3, [rsi+1] ; - - movq mm2, mm1 ; - movq mm4, mm3 ; - - punpcklbw mm1, mm0 ; - punpckhbw mm2, mm0 ; - - pmullw mm1, [rax] ; - pmullw mm2, [rax] ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - pmullw mm3, [rax+8] ; - pmullw mm4, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - psraw mm2, mmx_filter_shift ; - - movq mm3, mm5 ; - movq mm4, mm5 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - movq mm5, mm1 ; - packuswb mm5, mm2 ; - - pmullw mm3, [rdx] ; - pmullw mm4, [rdx] ; - - pmullw mm1, [rdx+8] ; - pmullw mm2, [rdx+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - psraw mm2, mmx_filter_shift ; - - movq mm3, [rdi] ; - movq mm4, mm3 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - psubw mm1, mm3 ; - psubw mm2, mm4 ; - - paddw mm6, mm1 ; - pmaddwd mm1, mm1 ; - - paddw mm6, mm2 ; - pmaddwd mm2, mm2 ; - - paddd mm7, mm1 ; - paddd mm7, mm2 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - movsxd r9, dword ptr arg(3) ;src_pixels_per_line ; - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil_var_mmx_loop ; - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(7) ;sum - mov rsi, arg(8) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -;short mmx_bi_rd[4] = { 64, 64, 64, 64}; -align 16 -mmx_bi_rd: - times 4 dw 64 diff --git a/libvpx/vpx_dsp/x86/variance_mmx.c b/libvpx/vpx_dsp/x86/variance_mmx.c deleted file mode 100644 index f04f4e2c8..000000000 --- a/libvpx/vpx_dsp/x86/variance_mmx.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_dsp_rtcd.h" - -#include "vpx_ports/mem.h" - -DECLARE_ALIGNED(16, static const int16_t, bilinear_filters_mmx[8][8]) = { - { 128, 128, 128, 128, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 112, 112, 112, 112 } -}; - -extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse, int *sum); - -extern void vpx_filter_block2d_bil4x4_var_mmx(const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - const int16_t *HFilter, - const int16_t *VFilter, - int *sum, - unsigned int *sumsquared); - -extern void vpx_filter_block2d_bil_var_mmx(const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - const int16_t *HFilter, - const int16_t *VFilter, - int *sum, - unsigned int *sumsquared); - - -unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); -} - -unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg); - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 6)); -} - -unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse2, &sum2); - vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, - b + 8 * b_stride + 8, b_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - *sse = var; - return var; -} - -unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse2, &sum2); - vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, - b + 8 * b_stride + 8, b_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} - -unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - -unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 7)); -} - -uint32_t vpx_sub_pixel_variance4x4_mmx(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse) { - int xsum; - unsigned int xxsum; - vpx_filter_block2d_bil4x4_var_mmx(a, a_stride, b, b_stride, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum, &xxsum); - *sse = xxsum; - return (xxsum - (((unsigned int)xsum * xsum) >> 4)); -} - - -uint32_t vpx_sub_pixel_variance8x8_mmx(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse) { - int xsum; - uint32_t xxsum; - vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum, &xxsum); - *sse = xxsum; - return (xxsum - (((uint32_t)xsum * xsum) >> 6)); -} - -uint32_t vpx_sub_pixel_variance16x16_mmx(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum0, &xxsum0); - - vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 16, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum1, &xxsum1); - - xsum0 += xsum1; - xxsum0 += xxsum1; - - *sse = xxsum0; - return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); -} - -uint32_t vpx_sub_pixel_variance16x8_mmx(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum0, &xxsum0); - - vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 8, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum1, &xxsum1); - - xsum0 += xsum1; - xxsum0 += xxsum1; - - *sse = xxsum0; - return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 7)); -} - -uint32_t vpx_sub_pixel_variance8x16_mmx(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse) { - int xsum; - unsigned int xxsum; - vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16, - bilinear_filters_mmx[xoffset], - bilinear_filters_mmx[yoffset], - &xsum, &xxsum); - *sse = xxsum; - return (xxsum - (((uint32_t)xsum * xsum) >> 7)); -} - -uint32_t vpx_variance_halfpixvar16x16_h_mmx(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 0, b, b_stride, sse); -} - -uint32_t vpx_variance_halfpixvar16x16_v_mmx(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 0, 4, b, b_stride, sse); -} - -uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 4, b, b_stride, sse); -} diff --git a/libvpx/vpx_dsp/x86/variance_sse2.c b/libvpx/vpx_dsp/x86/variance_sse2.c index e6c9365ab..6987c2e24 100644 --- a/libvpx/vpx_dsp/x86/variance_sse2.c +++ b/libvpx/vpx_dsp/x86/variance_sse2.c @@ -171,7 +171,7 @@ unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride, unsigned int *sse) { int sum; get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 4); + return *sse - ((sum * sum) >> 4); } unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, @@ -180,7 +180,7 @@ unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum, get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); + return *sse - ((sum * sum) >> 5); } unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, @@ -189,7 +189,7 @@ unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum, get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); + return *sse - ((sum * sum) >> 5); } unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride, @@ -197,7 +197,7 @@ unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride, unsigned int *sse) { int sum; vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 6); + return *sse - ((sum * sum) >> 6); } unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride, @@ -206,7 +206,7 @@ unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum, vpx_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); + return *sse - ((sum * sum) >> 7); } unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride, @@ -215,7 +215,7 @@ unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum, vpx_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); + return *sse - ((sum * sum) >> 7); } unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride, @@ -223,7 +223,7 @@ unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride, unsigned int *sse) { int sum; vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 8); + return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8); } unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride, @@ -320,16 +320,16 @@ unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride, int height, unsigned int *sse, \ void *unused0, void *unused) #define DECLS(opt1, opt2) \ - DECL(4, opt2); \ + DECL(4, opt1); \ DECL(8, opt1); \ DECL(16, opt1) -DECLS(sse2, sse); +DECLS(sse2, sse2); DECLS(ssse3, ssse3); #undef DECLS #undef DECL -#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ int src_stride, \ int x_offset, \ @@ -365,25 +365,25 @@ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ } \ } \ *sse_ptr = sse; \ - return sse - ((cast se * se) >> (wlog2 + hlog2)); \ + return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ -FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ -FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ -FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ -FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ -FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ -FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ -FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \ -FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \ -FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \ -FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \ -FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \ -FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \ -FN(4, 4, 4, 2, 2, opt2, (uint32_t)) - -FNS(sse2, sse); +FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ +FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ +FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ +FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ +FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ +FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t)); \ +FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \ +FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \ +FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \ +FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t)); \ +FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t)) + +FNS(sse2, sse2); FNS(ssse3, ssse3); #undef FNS @@ -401,16 +401,16 @@ int vpx_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ int height, unsigned int *sse, \ void *unused0, void *unused) #define DECLS(opt1, opt2) \ -DECL(4, opt2); \ +DECL(4, opt1); \ DECL(8, opt1); \ DECL(16, opt1) -DECLS(sse2, sse); +DECLS(sse2, sse2); DECLS(ssse3, ssse3); #undef DECL #undef DECLS -#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ int src_stride, \ int x_offset, \ @@ -451,23 +451,23 @@ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ } \ } \ *sseptr = sse; \ - return sse - ((cast se * se) >> (wlog2 + hlog2)); \ + return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ -FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ -FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ -FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ -FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ -FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ -FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ -FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \ -FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \ -FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \ -FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \ -FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \ -FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \ -FN(4, 4, 4, 2, 2, opt2, (uint32_t)) +FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ +FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ +FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ +FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ +FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ +FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \ +FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \ +FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \ +FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \ +FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t)); \ +FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t)) FNS(sse2, sse); FNS(ssse3, ssse3); diff --git a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm index 9c5b414b4..abc027065 100644 --- a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm +++ b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm @@ -13,15 +13,21 @@ SECTION .text %macro convolve_fn 1-2 -INIT_XMM sse2 +%ifidn %1, avg +%define AUX_XMM_REGS 4 +%else +%define AUX_XMM_REGS 0 +%endif %ifidn %2, highbd %define pavg pavgw -cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ - fx, fxs, fy, fys, w, h, bd +cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ + dst, dst_stride, \ + fx, fxs, fy, fys, w, h, bd %else %define pavg pavgb -cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ - fx, fxs, fy, fys, w, h +cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ + dst, dst_stride, \ + fx, fxs, fy, fys, w, h %endif mov r4d, dword wm %ifidn %2, highbd @@ -152,27 +158,30 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ jnz .loop16 RET -INIT_MMX sse .w8: mov r4d, dword hm lea r5q, [src_strideq*3] lea r6q, [dst_strideq*3] .loop8: - movu m0, [srcq] - movu m1, [srcq+src_strideq] - movu m2, [srcq+src_strideq*2] - movu m3, [srcq+r5q] + movh m0, [srcq] + movh m1, [srcq+src_strideq] + movh m2, [srcq+src_strideq*2] + movh m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+dst_strideq] - pavg m2, [dstq+dst_strideq*2] - pavg m3, [dstq+r6q] + movh m4, [dstq] + movh m5, [dstq+dst_strideq] + movh m6, [dstq+dst_strideq*2] + movh m7, [dstq+r6q] + pavg m0, m4 + pavg m1, m5 + pavg m2, m6 + pavg m3, m7 %endif - mova [dstq ], m0 - mova [dstq+dst_strideq ], m1 - mova [dstq+dst_strideq*2], m2 - mova [dstq+r6q ], m3 + movh [dstq ], m0 + movh [dstq+dst_strideq ], m1 + movh [dstq+dst_strideq*2], m2 + movh [dstq+r6q ], m3 lea dstq, [dstq+dst_strideq*4] sub r4d, 4 jnz .loop8 @@ -184,25 +193,25 @@ INIT_MMX sse lea r5q, [src_strideq*3] lea r6q, [dst_strideq*3] .loop4: - movh m0, [srcq] - movh m1, [srcq+src_strideq] - movh m2, [srcq+src_strideq*2] - movh m3, [srcq+r5q] + movd m0, [srcq] + movd m1, [srcq+src_strideq] + movd m2, [srcq+src_strideq*2] + movd m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - movh m4, [dstq] - movh m5, [dstq+dst_strideq] - movh m6, [dstq+dst_strideq*2] - movh m7, [dstq+r6q] + movd m4, [dstq] + movd m5, [dstq+dst_strideq] + movd m6, [dstq+dst_strideq*2] + movd m7, [dstq+r6q] pavg m0, m4 pavg m1, m5 pavg m2, m6 pavg m3, m7 %endif - movh [dstq ], m0 - movh [dstq+dst_strideq ], m1 - movh [dstq+dst_strideq*2], m2 - movh [dstq+r6q ], m3 + movd [dstq ], m0 + movd [dstq+dst_strideq ], m1 + movd [dstq+dst_strideq*2], m2 + movd [dstq+r6q ], m3 lea dstq, [dstq+dst_strideq*4] sub r4d, 4 jnz .loop4 @@ -210,6 +219,7 @@ INIT_MMX sse %endif %endmacro +INIT_XMM sse2 convolve_fn copy convolve_fn avg %if CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm index 3fbaa274c..d2cb8ea29 100644 --- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -16,6 +16,11 @@ pw_64: times 8 dw 64 ; %define USE_PMULHRSW ; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss ; when using this instruction. +; +; The add order below (based on ffvp9) must be followed to prevent outranges. +; x = k0k1 + k4k5 +; y = k2k3 + k6k7 +; z = signed SAT(x + y) SECTION .text %if ARCH_X86_64 @@ -77,17 +82,12 @@ SECTION .text pmaddubsw %2, k0k1k4k5 pmaddubsw m3, k2k3k6k7 - - mova m4, %2 - mova m5, m3 - psrldq %2, 8 - psrldq m3, 8 - mova m6, m5 - - paddsw m4, m3 - pmaxsw m5, %2 - pminsw %2, m6 + mova m4, %2 ;k0k1 + mova m5, m3 ;k2k3 + psrldq %2, 8 ;k4k5 + psrldq m3, 8 ;k6k7 paddsw %2, m4 + paddsw m5, m3 paddsw %2, m5 paddsw %2, krd psraw %2, 7 @@ -157,27 +157,20 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ pmaddubsw m7, k0k1k4k5 palignr m3, m2, 5 pmaddubsw m3, k2k3k6k7 - mova m0, m4 - mova m5, m1 - mova m2, m7 - psrldq m4, 8 - psrldq m1, 8 - mova m6, m5 - paddsw m0, m1 - mova m1, m3 - psrldq m7, 8 - psrldq m3, 8 - paddsw m2, m3 - mova m3, m1 - pmaxsw m5, m4 - pminsw m4, m6 + mova m0, m4 ;k0k1 + mova m5, m1 ;k2k3 + mova m2, m7 ;k0k1 upper + psrldq m4, 8 ;k4k5 + psrldq m1, 8 ;k6k7 paddsw m4, m0 - paddsw m4, m5 - pmaxsw m1, m7 - pminsw m7, m3 + paddsw m5, m1 + mova m1, m3 ;k2k3 upper + psrldq m7, 8 ;k4k5 upper + psrldq m3, 8 ;k6k7 upper paddsw m7, m2 + paddsw m4, m5 + paddsw m1, m3 paddsw m7, m1 - paddsw m4, krd psraw m4, 7 packuswb m4, m4 @@ -240,16 +233,13 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ pmaddubsw %3, k2k3 pmaddubsw %4, k4k5 pmaddubsw %5, k6k7 - + paddsw %2, %4 + paddsw %5, %3 paddsw %2, %5 - mova %1, %3 - pminsw %3, %4 - pmaxsw %1, %4 - paddsw %2, %3 - paddsw %1, %2 - paddsw %1, krd - psraw %1, 7 - packuswb %1, %1 + paddsw %2, krd + psraw %2, 7 + packuswb %2, %2 + SWAP %1, %2 %endm ;------------------------------------------------------------------------------- @@ -293,39 +283,33 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ pmaddubsw m3, k4k5 palignr m7, m4, 13 - paddsw m1, m5 - mova m5, m6 - mova m0, m2 - palignr m5, m4, 5 - pminsw m2, m3 + mova m0, m6 + palignr m0, m4, 5 pmaddubsw m7, k6k7 - pmaxsw m3, m0 + paddsw m1, m3 + paddsw m2, m5 paddsw m1, m2 - mova m0, m6 + mova m5, m6 palignr m6, m4, 1 - pmaddubsw m5, k2k3 - paddsw m1, m3 + pmaddubsw m0, k2k3 pmaddubsw m6, k0k1 - palignr m0, m4, 9 + palignr m5, m4, 9 paddsw m1, krd - pmaddubsw m0, k4k5 - mova m4, m5 + pmaddubsw m5, k4k5 psraw m1, 7 - pminsw m5, m0 - paddsw m6, m7 + paddsw m0, m7 +%ifidn %1, h8_avg + movh m7, [dstq] + movh m2, [dstq + dstrideq] +%endif packuswb m1, m1 - paddsw m6, m5 - pmaxsw m0, m4 paddsw m6, m0 paddsw m6, krd psraw m6, 7 packuswb m6, m6 - %ifidn %1, h8_avg - movh m0, [dstq] - movh m2, [dstq + dstrideq] - pavgb m1, m0 + pavgb m1, m7 pavgb m6, m2 %endif movh [dstq], m1 @@ -388,7 +372,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ pmaddubsw m1, k2k3 palignr m2, m7, 9 pmaddubsw m2, k4k5 - paddsw m0, m3 + paddsw m1, m3 mova m3, m4 punpckhbw m4, m4 mova m5, m4 @@ -403,17 +387,13 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ pmaddubsw m6, k4k5 palignr m7, m3, 13 pmaddubsw m7, k6k7 - - mova m3, m1 - pmaxsw m1, m2 - pminsw m2, m3 paddsw m0, m2 paddsw m0, m1 - paddsw m4, m7 - mova m7, m5 - pmaxsw m5, m6 - pminsw m6, m7 +%ifidn %1, h8_avg + mova m1, [dstq] +%endif paddsw m4, m6 + paddsw m5, m7 paddsw m4, m5 paddsw m0, krd paddsw m4, krd @@ -421,7 +401,6 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ psraw m4, 7 packuswb m0, m4 %ifidn %1, h8_avg - mova m1, [dstq] pavgb m0, m1 %endif lea srcq, [srcq + sstrideq] @@ -488,27 +467,21 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ movx m7, [src1q + sstride6q ] ;H punpcklbw m6, m7 ;G H pmaddubsw m6, k6k7 - mova tmp, m2 pmaddubsw m3, k2k3 pmaddubsw m1, k0k1 - pmaxsw m2, m4 - paddsw m0, m6 + paddsw m0, m4 + paddsw m2, m6 movx m6, [srcq + sstrideq * 8 ] ;H next iter punpcklbw m7, m6 pmaddubsw m7, k6k7 - pminsw m4, tmp - paddsw m0, m4 - mova m4, m3 paddsw m0, m2 - pminsw m3, m5 - pmaxsw m5, m4 paddsw m0, krd psraw m0, 7 - paddsw m1, m7 + paddsw m1, m5 packuswb m0, m0 + paddsw m3, m7 paddsw m1, m3 - paddsw m1, m5 paddsw m1, krd psraw m1, 7 lea srcq, [srcq + sstrideq * 2 ] @@ -538,11 +511,11 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ movx m1, [srcq + sstrideq ] ;B movx m6, [srcq + sstride6q ] ;G punpcklbw m0, m1 ;A B - movx m7, [rax + sstride6q ] ;H + movx m7, [src1q + sstride6q ] ;H pmaddubsw m0, k0k1 movx m2, [srcq + sstrideq * 2 ] ;C punpcklbw m6, m7 ;G H - movx m3, [rax + sstrideq * 2 ] ;D + movx m3, [src1q + sstrideq * 2] ;D pmaddubsw m6, k6k7 movx m4, [srcq + sstrideq * 4 ] ;E punpcklbw m2, m3 ;C D @@ -550,10 +523,7 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ punpcklbw m4, m5 ;E F pmaddubsw m2, k2k3 pmaddubsw m4, k4k5 - paddsw m0, m6 - mova m1, m2 - pmaxsw m2, m4 - pminsw m4, m1 + paddsw m2, m6 paddsw m0, m4 paddsw m0, m2 paddsw m0, krd @@ -572,7 +542,6 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ %macro SUBPIX_VFILTER16 1 cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter - mova m4, [filterq] SETUP_LOCAL_VARS %if ARCH_X86_64 @@ -611,12 +580,9 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ punpcklbw m3, m5 ;A B movh m7, [srcq + sstrideq * 2 + 8] ;C pmaddubsw m6, k6k7 - mova m1, m2 movh m5, [src1q + sstrideq * 2 + 8] ;D - pmaxsw m2, m4 punpcklbw m7, m5 ;C D - pminsw m4, m1 - paddsw m0, m6 + paddsw m2, m6 pmaddubsw m3, k0k1 movh m1, [srcq + sstrideq * 4 + 8] ;E paddsw m0, m4 @@ -630,30 +596,24 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ movh m5, [src1q + sstride6q + 8] ;H psraw m0, 7 punpcklbw m2, m5 ;G H - packuswb m0, m0 pmaddubsw m2, k6k7 %ifidn %1, v8_avg - movh m4, [dstq] - pavgb m0, m4 + mova m4, [dstq] %endif movh [dstq], m0 - mova m6, m7 - pmaxsw m7, m1 - pminsw m1, m6 - paddsw m3, m2 + paddsw m7, m2 paddsw m3, m1 paddsw m3, m7 paddsw m3, krd psraw m3, 7 - packuswb m3, m3 + packuswb m0, m3 add srcq, sstrideq add src1q, sstrideq %ifidn %1, v8_avg - movh m1, [dstq + 8] - pavgb m3, m1 + pavgb m0, m4 %endif - movh [dstq + 8], m3 + mova [dstq], m0 add dstq, dst_stride dec heightd jnz .loop diff --git a/libvpx/vpx_mem/vpx_mem.c b/libvpx/vpx_mem/vpx_mem.c index b98fe83c0..b261fc0da 100644 --- a/libvpx/vpx_mem/vpx_mem.c +++ b/libvpx/vpx_mem/vpx_mem.c @@ -9,8 +9,6 @@ */ -#define __VPX_MEM_C__ - #include "vpx_mem.h" #include <stdio.h> #include <stdlib.h> diff --git a/libvpx/vpx_ports/mem_ops.h b/libvpx/vpx_ports/mem_ops.h index d4a3d773f..620df31b2 100644 --- a/libvpx/vpx_ports/mem_ops.h +++ b/libvpx/vpx_ports/mem_ops.h @@ -89,7 +89,7 @@ static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; - val = mem[0] << 24; + val = ((unsigned MEM_VALUE_T)mem[0]) << 24; val |= mem[1] << 16; val |= mem[2] << 8; val |= mem[3]; @@ -125,7 +125,7 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; - val = mem[3] << 24; + val = ((unsigned MEM_VALUE_T)mem[3]) << 24; val |= mem[2] << 16; val |= mem[1] << 8; val |= mem[0]; @@ -168,8 +168,8 @@ mem_get_s_generic(le, 32) static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 8) & 0xff; - mem[1] = (val >> 0) & 0xff; + mem[0] = (MAU_T)((val >> 8) & 0xff); + mem[1] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_be24 @@ -177,9 +177,9 @@ static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 16) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 0) & 0xff; + mem[0] = (MAU_T)((val >> 16) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_be32 @@ -187,10 +187,10 @@ static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 24) & 0xff; - mem[1] = (val >> 16) & 0xff; - mem[2] = (val >> 8) & 0xff; - mem[3] = (val >> 0) & 0xff; + mem[0] = (MAU_T)((val >> 24) & 0xff); + mem[1] = (MAU_T)((val >> 16) & 0xff); + mem[2] = (MAU_T)((val >> 8) & 0xff); + mem[3] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_le16 @@ -198,8 +198,8 @@ static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); } #undef mem_put_le24 @@ -207,9 +207,9 @@ static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 16) & 0xff; + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 16) & 0xff); } #undef mem_put_le32 @@ -217,10 +217,10 @@ static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 16) & 0xff; - mem[3] = (val >> 24) & 0xff; + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 16) & 0xff); + mem[3] = (MAU_T)((val >> 24) & 0xff); } #endif // VPX_PORTS_MEM_OPS_H_ diff --git a/libvpx/vpx_ports/mem_ops_aligned.h b/libvpx/vpx_ports/mem_ops_aligned.h index c16111fec..46f61738b 100644 --- a/libvpx/vpx_ports/mem_ops_aligned.h +++ b/libvpx/vpx_ports/mem_ops_aligned.h @@ -28,8 +28,8 @@ * could redefine these macros. */ #define swap_endian_16(val,raw) do {\ - val = ((raw>>8) & 0x00ff) \ - | ((raw<<8) & 0xff00);\ + val = (uint16_t)(((raw>>8) & 0x00ff) \ + | ((raw<<8) & 0xff00));\ } while(0) #define swap_endian_32(val,raw) do {\ val = ((raw>>24) & 0x000000ff) \ diff --git a/libvpx/vpx_ports/vpx_once.h b/libvpx/vpx_ports/vpx_once.h index f1df39434..da04db459 100644 --- a/libvpx/vpx_ports/vpx_once.h +++ b/libvpx/vpx_ports/vpx_once.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -13,63 +13,83 @@ #include "vpx_config.h" +/* Implement a function wrapper to guarantee initialization + * thread-safety for library singletons. + * + * NOTE: These functions use static locks, and can only be + * used with one common argument per compilation unit. So + * + * file1.c: + * vpx_once(foo); + * ... + * vpx_once(foo); + * + * file2.c: + * vpx_once(bar); + * + * will ensure foo() and bar() are each called only once, but in + * + * file1.c: + * vpx_once(foo); + * vpx_once(bar): + * + * bar() will never be called because the lock is used up + * by the call to foo(). + */ + #if CONFIG_MULTITHREAD && defined(_WIN32) #include <windows.h> #include <stdlib.h> +/* Declare a per-compilation-unit state variable to track the progress + * of calling func() only once. This must be at global scope because + * local initializers are not thread-safe in MSVC prior to Visual + * Studio 2015. + * + * As a static, once_state will be zero-initialized as program start. + */ +static LONG once_state; static void once(void (*func)(void)) { - static CRITICAL_SECTION *lock; - static LONG waiters; - static int done; - void *lock_ptr = &lock; - - /* If the initialization is complete, return early. This isn't just an - * optimization, it prevents races on the destruction of the global - * lock. + /* Try to advance once_state from its initial value of 0 to 1. + * Only one thread can succeed in doing so. */ - if(done) + if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { + /* We're the winning thread, having set once_state to 1. + * Call our function. */ + func(); + /* Now advance once_state to 2, unblocking any other threads. */ + InterlockedIncrement(&once_state); return; - - InterlockedIncrement(&waiters); - - /* Get a lock. We create one and try to make it the one-true-lock, - * throwing it away if we lost the race. - */ - - { - /* Scope to protect access to new_lock */ - CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(new_lock); - if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) - { - DeleteCriticalSection(new_lock); - free(new_lock); - } } - /* At this point, we have a lock that can be synchronized on. We don't - * care which thread actually performed the allocation. + /* We weren't the winning thread, but we want to block on + * the state variable so we don't return before func() + * has finished executing elsewhere. + * + * Try to advance once_state from 2 to 2, which is only possible + * after the winning thead advances it from 1 to 2. */ - - EnterCriticalSection(lock); - - if (!done) - { - func(); - done = 1; + while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { + /* State isn't yet 2. Try again. + * + * We are used for singleton initialization functions, + * which should complete quickly. Contention will likewise + * be rare, so it's worthwhile to use a simple but cpu- + * intensive busy-wait instead of successive backoff, + * waiting on a kernel object, or another heavier-weight scheme. + * + * We can at least yield our timeslice. + */ + Sleep(0); } - LeaveCriticalSection(lock); - - /* Last one out should free resources. The destructed objects are - * protected by checking if(done) above. + /* We've seen once_state advance to 2, so we know func() + * has been called. And we've left once_state as we found it, + * so other threads will have the same experience. + * + * It's safe to return now. */ - if(!InterlockedDecrement(&waiters)) - { - DeleteCriticalSection(lock); - free(lock); - lock = NULL; - } + return; } diff --git a/libvpx/vpx_ports/x86.h b/libvpx/vpx_ports/x86.h index 5da346e58..bae25ac34 100644 --- a/libvpx/vpx_ports/x86.h +++ b/libvpx/vpx_ports/x86.h @@ -12,6 +12,11 @@ #ifndef VPX_PORTS_X86_H_ #define VPX_PORTS_X86_H_ #include <stdlib.h> + +#if defined(_MSC_VER) +#include <intrin.h> /* For __cpuidex, __rdtsc */ +#endif + #include "vpx_config.h" #include "vpx/vpx_integer.h" @@ -77,16 +82,12 @@ typedef enum { #else /* end __SUNPRO__ */ #if ARCH_X86_64 #if defined(_MSC_VER) && _MSC_VER > 1500 -void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); -#pragma intrinsic(__cpuidex) #define cpuid(func, func2, a, b, c, d) do {\ int regs[4];\ __cpuidex(regs, func, func2); \ a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ } while(0) #else -void __cpuid(int CPUInfo[4], int info_type); -#pragma intrinsic(__cpuid) #define cpuid(func, func2, a, b, c, d) do {\ int regs[4];\ __cpuid(regs, func); \ @@ -172,7 +173,7 @@ x86_simd_caps(void) { env = getenv("VPX_SIMD_CAPS_MASK"); if (env && *env) - mask = strtol(env, NULL, 0); + mask = (unsigned int)strtoul(env, NULL, 0); /* Ensure that the CPUID instruction supports extended features */ cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); @@ -212,10 +213,11 @@ x86_simd_caps(void) { return flags & mask; } -#if ARCH_X86_64 && defined(_MSC_VER) -unsigned __int64 __rdtsc(void); -#pragma intrinsic(__rdtsc) -#endif +// Note: +// 32-bit CPU cycle counter is light-weighted for most function performance +// measurement. For large function (CPU time > a couple of seconds), 64-bit +// counter should be used. +// 32-bit CPU cycle counter static INLINE unsigned int x86_readtsc(void) { #if defined(__GNUC__) && __GNUC__ @@ -234,7 +236,25 @@ x86_readtsc(void) { #endif #endif } - +// 64-bit CPU cycle counter +static INLINE uint64_t +x86_readtsc64(void) { +#if defined(__GNUC__) && __GNUC__ + uint32_t hi, lo; + __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + uint_t hi, lo; + asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +#else +#if ARCH_X86_64 + return (uint64_t)__rdtsc(); +#else + __asm rdtsc; +#endif +#endif +} #if defined(__GNUC__) && __GNUC__ #define x86_pause_hint()\ diff --git a/libvpx/vpx_ports/x86_abi_support.asm b/libvpx/vpx_ports/x86_abi_support.asm index c94b76a06..708fa101c 100644 --- a/libvpx/vpx_ports/x86_abi_support.asm +++ b/libvpx/vpx_ports/x86_abi_support.asm @@ -189,7 +189,6 @@ %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_SAVE_ARG 1 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ @@ -208,7 +207,6 @@ %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_SAVE_ARG 1 %macro GET_GOT 1 push %1 call %%get_got diff --git a/libvpx/vpx_scale/generic/yv12config.c b/libvpx/vpx_scale/generic/yv12config.c index 773921813..6bbb6d8d4 100644 --- a/libvpx/vpx_scale/generic/yv12config.c +++ b/libvpx/vpx_scale/generic/yv12config.c @@ -114,7 +114,7 @@ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, return -2; } -#if CONFIG_VP9 || CONFIG_VP10 +#if CONFIG_VP9 // TODO(jkoleszar): Maybe replace this with struct vpx_image int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { @@ -160,29 +160,12 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment; -#if CONFIG_ALPHA - const int alpha_width = aligned_width; - const int alpha_height = aligned_height; - const int alpha_stride = y_stride; - const int alpha_border_w = border; - const int alpha_border_h = border; - const uint64_t alpha_plane_size = (alpha_height + 2 * alpha_border_h) * - (uint64_t)alpha_stride + byte_alignment; -#if CONFIG_VP9_HIGHBITDEPTH - const uint64_t frame_size = (1 + use_highbitdepth) * - (yplane_size + 2 * uvplane_size + alpha_plane_size); -#else - const uint64_t frame_size = yplane_size + 2 * uvplane_size + - alpha_plane_size; -#endif // CONFIG_VP9_HIGHBITDEPTH -#else #if CONFIG_VP9_HIGHBITDEPTH const uint64_t frame_size = (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); #else const uint64_t frame_size = yplane_size + 2 * uvplane_size; #endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_ALPHA uint8_t *buf = NULL; @@ -203,6 +186,15 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, return -1; ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + // This memset is needed for fixing the issue of using uninitialized + // value in msan test. It will cause a perf loss, so only do this for + // msan test. + memset(ybf->buffer_alloc, 0, (int)frame_size); +#endif +#endif } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. vpx_free(ybf->buffer_alloc); @@ -268,14 +260,6 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + uv_border_w, vp9_byte_align); -#if CONFIG_ALPHA - ybf->alpha_width = alpha_width; - ybf->alpha_height = alpha_height; - ybf->alpha_stride = alpha_stride; - ybf->alpha_buffer = (uint8_t *)yv12_align_addr( - buf + yplane_size + 2 * uvplane_size + - (alpha_border_h * alpha_stride) + alpha_border_w, vp9_byte_align); -#endif ybf->corrupted = 0; /* assume not corrupted by errors */ return 0; } diff --git a/libvpx/vpx_scale/generic/yv12extend.c b/libvpx/vpx_scale/generic/yv12extend.c index 670144bc1..52f0aff1f 100644 --- a/libvpx/vpx_scale/generic/yv12extend.c +++ b/libvpx/vpx_scale/generic/yv12extend.c @@ -157,7 +157,7 @@ void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { uv_border + ybf->uv_width - ybf->uv_crop_width); } -#if CONFIG_VP9 || CONFIG_VP10 +#if CONFIG_VP9 static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { const int c_w = ybf->uv_crop_width; const int c_h = ybf->uv_crop_height; @@ -211,13 +211,13 @@ void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { } #if CONFIG_VP9_HIGHBITDEPTH -void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { +static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *src = CONVERT_TO_SHORTPTR(src8); memcpy(dst, src, num * sizeof(uint16_t)); } #endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_VP9 || CONFIG_VP10 +#endif // CONFIG_VP9 // Copies the source image into the destination image and updates the // destination's UMV borders. diff --git a/libvpx/vpx_scale/vpx_scale_rtcd.pl b/libvpx/vpx_scale/vpx_scale_rtcd.pl index 56b952ba3..44b115c7e 100644 --- a/libvpx/vpx_scale/vpx_scale_rtcd.pl +++ b/libvpx/vpx_scale/vpx_scale_rtcd.pl @@ -22,7 +22,7 @@ add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_yb add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; -if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) { +if (vpx_config("CONFIG_VP9") eq "yes") { add_proto qw/void vpx_extend_frame_borders/, "struct yv12_buffer_config *ybf"; specialize qw/vpx_extend_frame_borders dspr2/; diff --git a/libvpx/vpx_util/vpx_thread.h b/libvpx/vpx_util/vpx_thread.h index de63c4da0..2062abd75 100644 --- a/libvpx/vpx_util/vpx_thread.h +++ b/libvpx/vpx_util/vpx_thread.h @@ -147,6 +147,152 @@ static INLINE int pthread_cond_wait(pthread_cond_t *const condition, pthread_mutex_lock(mutex); return !ok; } +#elif defined(__OS2__) +#define INCL_DOS +#include <os2.h> // NOLINT + +#include <errno.h> // NOLINT +#include <stdlib.h> // NOLINT +#include <sys/builtin.h> // NOLINT + +#define pthread_t TID +#define pthread_mutex_t HMTX + +typedef struct { + HEV event_sem_; + HEV ack_sem_; + volatile unsigned wait_count_; +} pthread_cond_t; + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +#define THREADFN void * +#define THREAD_RETURN(val) (val) + +typedef struct { + void* (*start_)(void*); + void* arg_; +} thread_arg; + +static void thread_start(void* arg) { + thread_arg targ = *(thread_arg *)arg; + free(arg); + + targ.start_(targ.arg_); +} + +static INLINE int pthread_create(pthread_t* const thread, const void* attr, + void* (*start)(void*), + void* arg) { + int tid; + thread_arg *targ = (thread_arg *)malloc(sizeof(*targ)); + if (targ == NULL) return 1; + + (void)attr; + + targ->start_ = start; + targ->arg_ = arg; + tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ); + if (tid == -1) { + free(targ); + return 1; + } + + *thread = tid; + return 0; +} + +static INLINE int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return DosWaitThread(&thread, DCWW_WAIT) != 0; +} + +// Mutex +static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, + void* mutexattr) { + (void)mutexattr; + return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0; +} + +static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { + return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY; +} + +static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { + return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0; +} + +static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { + return DosReleaseMutexSem(*mutex) != 0; +} + +static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { + return DosCloseMutexSem(*mutex) != 0; +} + +// Condition +static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { + int ok = 1; + ok &= DosCloseEventSem(condition->event_sem_) == 0; + ok &= DosCloseEventSem(condition->ack_sem_) == 0; + return !ok; +} + +static INLINE int pthread_cond_init(pthread_cond_t *const condition, + void* cond_attr) { + int ok = 1; + (void)cond_attr; + + ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) + == 0; + ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0; + if (!ok) { + pthread_cond_destroy(condition); + return 1; + } + condition->wait_count_ = 0; + return 0; +} + +static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { + int ok = 1; + + if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) { + ok &= DosPostEventSem(condition->event_sem_) == 0; + ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0; + } + + return !ok; +} + +static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { + int ok = 1; + + while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) + ok &= pthread_cond_signal(condition) == 0; + + return !ok; +} + +static INLINE int pthread_cond_wait(pthread_cond_t *const condition, + pthread_mutex_t *const mutex) { + int ok = 1; + + __atomic_increment(&condition->wait_count_); + + ok &= pthread_mutex_unlock(mutex) == 0; + + ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0; + + __atomic_decrement(&condition->wait_count_); + + ok &= DosPostEventSem(condition->ack_sem_) == 0; + + pthread_mutex_lock(mutex); + + return !ok; +} #else // _WIN32 #include <pthread.h> // NOLINT # define THREADFN void* diff --git a/libvpx/vpxdec.c b/libvpx/vpxdec.c index 285d58e1e..dbe64aa94 100644 --- a/libvpx/vpxdec.c +++ b/libvpx/vpxdec.c @@ -28,7 +28,7 @@ #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" -#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER +#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif @@ -257,8 +257,7 @@ static int read_frame(struct VpxDecInputContext *input, uint8_t **buf, switch (input->vpx_input_ctx->file_type) { #if CONFIG_WEBM_IO case FILE_TYPE_WEBM: - return webm_read_frame(input->webm_ctx, - buf, bytes_in_buffer, buffer_size); + return webm_read_frame(input->webm_ctx, buf, bytes_in_buffer); #endif case FILE_TYPE_RAW: return raw_read_frame(input->vpx_input_ctx->file, @@ -642,7 +641,7 @@ static int main_loop(int argc, const char **argv_) { summary = 1; else if (arg_match(&arg, &threadsarg, argi)) cfg.threads = arg_parse_uint(&arg); -#if CONFIG_VP9_DECODER || CONFIG_VP10_DECODER +#if CONFIG_VP9_DECODER else if (arg_match(&arg, &frameparallelarg, argi)) frame_parallel = 1; #endif diff --git a/libvpx/vpxenc.c b/libvpx/vpxenc.c index cb78226b3..efcf06495 100644 --- a/libvpx/vpxenc.c +++ b/libvpx/vpxenc.c @@ -32,10 +32,10 @@ #include "./ivfenc.h" #include "./tools_common.h" -#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif -#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER +#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif @@ -374,21 +374,22 @@ static const int vp8_arg_ctrl_map[] = { }; #endif -#if CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP9_ENCODER static const arg_def_t cpu_used_vp9 = ARG_DEF( NULL, "cpu-used", 1, "CPU Used (-8..8)"); static const arg_def_t tile_cols = ARG_DEF( NULL, "tile-columns", 1, "Number of tile columns to use, log2"); static const arg_def_t tile_rows = ARG_DEF( - NULL, "tile-rows", 1, "Number of tile rows to use, log2"); + NULL, "tile-rows", 1, + "Number of tile rows to use, log2 (set to 0 while threads > 1)"); static const arg_def_t lossless = ARG_DEF( - NULL, "lossless", 1, "Lossless mode"); + NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)"); static const arg_def_t frame_parallel_decoding = ARG_DEF( NULL, "frame-parallel", 1, "Enable frame parallel decodability features"); static const arg_def_t aq_mode = ARG_DEF( NULL, "aq-mode", 1, "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, " - "3: cyclic refresh)"); + "3: cyclic refresh, 4: equator360)"); static const arg_def_t frame_periodic_boost = ARG_DEF( NULL, "frame-boost", 1, "Enable frame periodic boost (0: off (default), 1: on)"); @@ -443,6 +444,11 @@ static const struct arg_enum_list tune_content_enum[] = { static const arg_def_t tune_content = ARG_DEF_ENUM( NULL, "tune-content", 1, "Tune content type", tune_content_enum); + +static const arg_def_t target_level = ARG_DEF( + NULL, "target-level", 1, + "Target level (255: off (default); 0: only keep level stats; 10: level 1.0;" + " 11: level 1.1; ... 62: level 6.2)"); #endif #if CONFIG_VP9_ENCODER @@ -453,7 +459,10 @@ static const arg_def_t *vp9_args[] = { &gf_cbr_boost_pct, &lossless, &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, &noise_sens, &tune_content, &input_color_space, - &min_gf_interval, &max_gf_interval, + &min_gf_interval, &max_gf_interval, &target_level, +#if CONFIG_VP9_HIGHBITDEPTH + &bitdeptharg, &inbitdeptharg, +#endif // CONFIG_VP9_HIGHBITDEPTH NULL }; static const int vp9_arg_ctrl_map[] = { @@ -466,33 +475,7 @@ static const int vp9_arg_ctrl_map[] = { VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY, VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE, - VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, - 0 -}; -#endif - -#if CONFIG_VP10_ENCODER -static const arg_def_t *vp10_args[] = { - &cpu_used_vp9, &auto_altref, &sharpness, &static_thresh, - &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, - &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct, - &gf_cbr_boost_pct, &lossless, - &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, - &noise_sens, &tune_content, &input_color_space, - &min_gf_interval, &max_gf_interval, - NULL -}; -static const int vp10_arg_ctrl_map[] = { - VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, - VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, - VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, - VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, - VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, - VP9E_SET_MAX_INTER_BITRATE_PCT, VP9E_SET_GF_CBR_BOOST_PCT, - VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, - VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY, - VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE, - VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, + VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, VP9E_SET_TARGET_LEVEL, 0 }; #endif @@ -524,10 +507,6 @@ void usage_exit(void) { fprintf(stderr, "\nVP9 Specific Options:\n"); arg_show_usage(stderr, vp9_args); #endif -#if CONFIG_VP10_ENCODER - fprintf(stderr, "\nVP10 Specific Options:\n"); - arg_show_usage(stderr, vp10_args); -#endif fprintf(stderr, "\nStream timebase (--timebase):\n" " The desired precision of timestamps in the output, expressed\n" " in fractional seconds. Default is 1/1000.\n"); @@ -773,9 +752,7 @@ static int compare_img(const vpx_image_t *const img1, #define NELEMENTS(x) (sizeof(x)/sizeof(x[0])) -#if CONFIG_VP10_ENCODER -#define ARG_CTRL_CNT_MAX NELEMENTS(vp10_arg_ctrl_map) -#elif CONFIG_VP9_ENCODER +#if CONFIG_VP9_ENCODER #define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map) #else #define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map) @@ -783,7 +760,7 @@ static int compare_img(const vpx_image_t *const img1, #if !CONFIG_WEBM_IO typedef int stereo_format_t; -struct EbmlGlobal { int debug; }; +struct WebmOutputContext { int debug; }; #endif /* Per-stream configuration */ @@ -798,7 +775,6 @@ struct stream_config { int arg_ctrls[ARG_CTRL_CNT_MAX][2]; int arg_ctrl_cnt; int write_webm; - int have_kf_max_dist; #if CONFIG_VP9_HIGHBITDEPTH // whether to use 16bit internal buffers int use_16bit_internal; @@ -812,7 +788,7 @@ struct stream_state { struct stream_config config; FILE *file; struct rate_hist *rate_hist; - struct EbmlGlobal ebml; + struct WebmOutputContext webm_ctx; uint64_t psnr_sse_total; uint64_t psnr_samples_total; double psnr_totals[4]; @@ -943,7 +919,7 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { } /* Validate global config */ if (global->passes == 0) { -#if CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER +#if CONFIG_VP9_ENCODER // Make default VP9 passes = 2 until there is a better quality 1-pass // encoder if (global->codec != NULL && global->codec->name != NULL) @@ -1055,13 +1031,13 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global, stream->config.write_webm = 1; #if CONFIG_WEBM_IO stream->config.stereo_fmt = STEREO_FORMAT_MONO; - stream->ebml.last_pts_ns = -1; - stream->ebml.writer = NULL; - stream->ebml.segment = NULL; + stream->webm_ctx.last_pts_ns = -1; + stream->webm_ctx.writer = NULL; + stream->webm_ctx.segment = NULL; #endif /* Allows removal of the application version from the EBML tags */ - stream->ebml.debug = global->debug; + stream->webm_ctx.debug = global->debug; /* Default lag_in_frames is 0 in realtime mode */ if (global->deadline == VPX_DL_REALTIME) @@ -1101,13 +1077,6 @@ static int parse_stream_params(struct VpxEncoderConfig *global, ctrl_args = vp9_args; ctrl_args_map = vp9_arg_ctrl_map; #endif -#if CONFIG_VP10_ENCODER - } else if (strcmp(global->codec->name, "vp10") == 0) { - // TODO(jingning): Reuse VP9 specific encoder configuration parameters. - // Consider to expand this set for VP10 encoder control. - ctrl_args = vp10_args; - ctrl_args_map = vp10_arg_ctrl_map; -#endif } for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { @@ -1218,13 +1187,11 @@ static int parse_stream_params(struct VpxEncoderConfig *global, config->cfg.kf_min_dist = arg_parse_uint(&arg); } else if (arg_match(&arg, &kf_max_dist, argi)) { config->cfg.kf_max_dist = arg_parse_uint(&arg); - config->have_kf_max_dist = 1; } else if (arg_match(&arg, &kf_disabled, argi)) { config->cfg.kf_mode = VPX_KF_DISABLED; #if CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &test16bitinternalarg, argi)) { - if (strcmp(global->codec->name, "vp9") == 0 || - strcmp(global->codec->name, "vp10") == 0) { + if (strcmp(global->codec->name, "vp9") == 0) { test_16bit_internal = 1; } #endif @@ -1258,8 +1225,7 @@ static int parse_stream_params(struct VpxEncoderConfig *global, } } #if CONFIG_VP9_HIGHBITDEPTH - if (strcmp(global->codec->name, "vp9") == 0 || - strcmp(global->codec->name, "vp10") == 0) { + if (strcmp(global->codec->name, "vp9") == 0) { config->use_16bit_internal = test_16bit_internal | (config->cfg.g_profile > 1); } @@ -1346,19 +1312,6 @@ static void set_stream_dimensions(struct stream_state *stream, } } - -static void set_default_kf_interval(struct stream_state *stream, - struct VpxEncoderConfig *global) { - /* Use a max keyframe interval of 5 seconds, if none was - * specified on the command line. - */ - if (!stream->config.have_kf_max_dist) { - double framerate = (double)global->framerate.num / global->framerate.den; - if (framerate > 0.0) - stream->config.cfg.kf_max_dist = (unsigned int)(5.0 * framerate); - } -} - static const char* file_type_to_string(enum VideoFileType t) { switch (t) { case FILE_TYPE_RAW: return "RAW"; @@ -1457,13 +1410,15 @@ static void open_output_file(struct stream_state *stream, #if CONFIG_WEBM_IO if (stream->config.write_webm) { - stream->ebml.stream = stream->file; - write_webm_file_header(&stream->ebml, cfg, + stream->webm_ctx.stream = stream->file; + write_webm_file_header(&stream->webm_ctx, cfg, &global->framerate, stream->config.stereo_fmt, global->codec->fourcc, pixel_aspect_ratio); } +#else + (void)pixel_aspect_ratio; #endif if (!stream->config.write_webm) { @@ -1481,7 +1436,7 @@ static void close_output_file(struct stream_state *stream, #if CONFIG_WEBM_IO if (stream->config.write_webm) { - write_webm_file_footer(&stream->ebml); + write_webm_file_footer(&stream->webm_ctx); } #endif @@ -1708,7 +1663,7 @@ static void get_cx_data(struct stream_state *stream, update_rate_histogram(stream->rate_hist, cfg, pkt); #if CONFIG_WEBM_IO if (stream->config.write_webm) { - write_webm_block(&stream->ebml, cfg, pkt); + write_webm_block(&stream->webm_ctx, cfg, pkt); } #endif if (!stream->config.write_webm) { @@ -1996,7 +1951,7 @@ int main(int argc, const char **argv_) { usage_exit(); /* Decide if other chroma subsamplings than 4:2:0 are supported */ - if (global.codec->fourcc == VP9_FOURCC || global.codec->fourcc == VP10_FOURCC) + if (global.codec->fourcc == VP9_FOURCC) input.only_i420 = 0; for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) { @@ -2060,9 +2015,11 @@ int main(int argc, const char **argv_) { #if !CONFIG_WEBM_IO FOREACH_STREAM({ - stream->config.write_webm = 0; - warn("vpxenc was compiled without WebM container support." - "Producing IVF output"); + if (stream->config.write_webm) { + stream->config.write_webm = 0; + warn("vpxenc was compiled without WebM container support." + "Producing IVF output"); + } }); #endif @@ -2072,10 +2029,10 @@ int main(int argc, const char **argv_) { if (!global.have_framerate) { global.framerate.num = input.framerate.numerator; global.framerate.den = input.framerate.denominator; + FOREACH_STREAM(stream->config.cfg.g_timebase.den = global.framerate.num; + stream->config.cfg.g_timebase.num = global.framerate.den); } - FOREACH_STREAM(set_default_kf_interval(stream, &global)); - /* Show configuration */ if (global.verbose && pass == 0) FOREACH_STREAM(show_stream_config(stream, &global, &input)); @@ -2100,8 +2057,7 @@ int main(int argc, const char **argv_) { FOREACH_STREAM(initialize_encoder(stream, &global)); #if CONFIG_VP9_HIGHBITDEPTH - if (strcmp(global.codec->name, "vp9") == 0 || - strcmp(global.codec->name, "vp10") == 0) { + if (strcmp(global.codec->name, "vp9") == 0) { // Check to see if at least one stream uses 16 bit internal. // Currently assume that the bit_depths for all streams using // highbitdepth are the same. diff --git a/libvpx/vpxstats.c b/libvpx/vpxstats.c index 172d8937c..16728ce09 100644 --- a/libvpx/vpxstats.c +++ b/libvpx/vpxstats.c @@ -26,17 +26,6 @@ int stats_open_file(stats_io_t *stats, const char *fpf, int pass) { stats->buf.buf = NULL; res = (stats->file != NULL); } else { -#if USE_POSIX_MMAP - struct stat stat_buf; - int fd; - - fd = open(fpf, O_RDONLY); - stats->file = fdopen(fd, "rb"); - fstat(fd, &stat_buf); - stats->buf.sz = stat_buf.st_size; - stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE, fd, 0); - res = (stats->buf.buf != NULL); -#else size_t nbytes; stats->file = fopen(fpf, "rb"); @@ -58,7 +47,6 @@ int stats_open_file(stats_io_t *stats, const char *fpf, int pass) { nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file); res = (nbytes == stats->buf.sz); -#endif /* USE_POSIX_MMAP */ } return res; @@ -82,11 +70,7 @@ int stats_open_mem(stats_io_t *stats, int pass) { void stats_close(stats_io_t *stats, int last_pass) { if (stats->file) { if (stats->pass == last_pass) { -#if USE_POSIX_MMAP - munmap(stats->buf.buf, stats->buf.sz); -#else free(stats->buf.buf); -#endif /* USE_POSIX_MMAP */ } fclose(stats->file); diff --git a/libvpx/webmdec.cc b/libvpx/webmdec.cc index f541cfecc..36dbd92bf 100644 --- a/libvpx/webmdec.cc +++ b/libvpx/webmdec.cc @@ -13,8 +13,8 @@ #include <cstring> #include <cstdio> -#include "third_party/libwebm/mkvparser.hpp" -#include "third_party/libwebm/mkvreader.hpp" +#include "third_party/libwebm/mkvparser/mkvparser.h" +#include "third_party/libwebm/mkvparser/mkvreader.h" namespace { @@ -103,8 +103,6 @@ int file_is_webm(struct WebmInputContext *webm_ctx, vpx_ctx->fourcc = VP8_FOURCC; } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) { vpx_ctx->fourcc = VP9_FOURCC; - } else if (!strncmp(video_track->GetCodecId(), "V_VP10", 6)) { - vpx_ctx->fourcc = VP10_FOURCC; } else { rewind_and_reset(webm_ctx, vpx_ctx); return 0; @@ -122,7 +120,6 @@ int file_is_webm(struct WebmInputContext *webm_ctx, int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer, - size_t *bytes_in_buffer, size_t *buffer_size) { // This check is needed for frame parallel decoding, in which case this // function could be called even after it has reached end of input stream. @@ -147,7 +144,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx, } else if (block_entry_eos || block_entry->EOS()) { cluster = segment->GetNext(cluster); if (cluster == NULL || cluster->EOS()) { - *bytes_in_buffer = 0; + *buffer_size = 0; webm_ctx->reached_eos = 1; return 1; } @@ -164,7 +161,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx, } get_new_block = true; } - if (status) { + if (status || block_entry == NULL) { return -1; } if (get_new_block) { @@ -187,10 +184,9 @@ int webm_read_frame(struct WebmInputContext *webm_ctx, if (*buffer == NULL) { return -1; } - *buffer_size = frame.len; webm_ctx->buffer = *buffer; } - *bytes_in_buffer = frame.len; + *buffer_size = frame.len; webm_ctx->timestamp_ns = block->GetTime(cluster); webm_ctx->is_key_frame = block->IsKey(); @@ -203,10 +199,9 @@ int webm_guess_framerate(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx) { uint32_t i = 0; uint8_t *buffer = NULL; - size_t bytes_in_buffer = 0; size_t buffer_size = 0; while (webm_ctx->timestamp_ns < 1000000000 && i < 50) { - if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) { + if (webm_read_frame(webm_ctx, &buffer, &buffer_size)) { break; } ++i; diff --git a/libvpx/webmdec.h b/libvpx/webmdec.h index 7d1638035..aa371f321 100644 --- a/libvpx/webmdec.h +++ b/libvpx/webmdec.h @@ -42,22 +42,18 @@ int file_is_webm(struct WebmInputContext *webm_ctx, // Reads a WebM Video Frame. Memory for the buffer is created, owned and managed // by this function. For the first call, |buffer| should be NULL and -// |*bytes_in_buffer| should be 0. Once all the frames are read and used, +// |*buffer_size| should be 0. Once all the frames are read and used, // webm_free() should be called, otherwise there will be a leak. // Parameters: // webm_ctx - WebmInputContext object // buffer - pointer where the frame data will be filled. -// bytes_in_buffer - pointer to buffer size. -// buffer_size - unused TODO(vigneshv): remove this +// buffer_size - pointer to buffer size. // Return values: // 0 - Success // 1 - End of Stream // -1 - Error -// TODO(vigneshv): Make the return values consistent across all functions in -// this file. int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer, - size_t *bytes_in_buffer, size_t *buffer_size); // Guesses the frame rate of the input file based on the container timestamps. diff --git a/libvpx/webmenc.cc b/libvpx/webmenc.cc index d41e70044..9929969a0 100644 --- a/libvpx/webmenc.cc +++ b/libvpx/webmenc.cc @@ -11,22 +11,22 @@ #include <string> -#include "third_party/libwebm/mkvmuxer.hpp" -#include "third_party/libwebm/mkvmuxerutil.hpp" -#include "third_party/libwebm/mkvwriter.hpp" +#include "third_party/libwebm/mkvmuxer/mkvmuxer.h" +#include "third_party/libwebm/mkvmuxer/mkvmuxerutil.h" +#include "third_party/libwebm/mkvmuxer/mkvwriter.h" namespace { const uint64_t kDebugTrackUid = 0xDEADBEEF; const int kVideoTrackNumber = 1; } // namespace -void write_webm_file_header(struct EbmlGlobal *glob, +void write_webm_file_header(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const struct vpx_rational *fps, stereo_format_t stereo_fmt, unsigned int fourcc, const struct VpxRational *par) { - mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream); + mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(webm_ctx->stream); mkvmuxer::Segment *const segment = new mkvmuxer::Segment(); segment->Init(writer); segment->set_mode(mkvmuxer::Segment::kFile); @@ -36,7 +36,7 @@ void write_webm_file_header(struct EbmlGlobal *glob, const uint64_t kTimecodeScale = 1000000; info->set_timecode_scale(kTimecodeScale); std::string version = "vpxenc"; - if (!glob->debug) { + if (!webm_ctx->debug) { version.append(std::string(" ") + vpx_codec_version_str()); } info->set_writing_app(version.c_str()); @@ -55,13 +55,8 @@ void write_webm_file_header(struct EbmlGlobal *glob, codec_id = "V_VP8"; break; case VP9_FOURCC: - codec_id = "V_VP9"; - break; - case VP10_FOURCC: - codec_id = "V_VP10"; - break; default: - codec_id = "V_VP10"; + codec_id = "V_VP9"; break; } video_track->set_codec_id(codec_id); @@ -74,23 +69,23 @@ void write_webm_file_header(struct EbmlGlobal *glob, video_track->set_display_width(display_width); video_track->set_display_height(cfg->g_h); } - if (glob->debug) { + if (webm_ctx->debug) { video_track->set_uid(kDebugTrackUid); } - glob->writer = writer; - glob->segment = segment; + webm_ctx->writer = writer; + webm_ctx->segment = segment; } -void write_webm_block(struct EbmlGlobal *glob, +void write_webm_block(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt) { mkvmuxer::Segment *const segment = - reinterpret_cast<mkvmuxer::Segment*>(glob->segment); + reinterpret_cast<mkvmuxer::Segment*>(webm_ctx->segment); int64_t pts_ns = pkt->data.frame.pts * 1000000000ll * cfg->g_timebase.num / cfg->g_timebase.den; - if (pts_ns <= glob->last_pts_ns) - pts_ns = glob->last_pts_ns + 1000000; - glob->last_pts_ns = pts_ns; + if (pts_ns <= webm_ctx->last_pts_ns) + pts_ns = webm_ctx->last_pts_ns + 1000000; + webm_ctx->last_pts_ns = pts_ns; segment->AddFrame(static_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz, @@ -99,14 +94,14 @@ void write_webm_block(struct EbmlGlobal *glob, pkt->data.frame.flags & VPX_FRAME_IS_KEY); } -void write_webm_file_footer(struct EbmlGlobal *glob) { +void write_webm_file_footer(struct WebmOutputContext *webm_ctx) { mkvmuxer::MkvWriter *const writer = - reinterpret_cast<mkvmuxer::MkvWriter*>(glob->writer); + reinterpret_cast<mkvmuxer::MkvWriter*>(webm_ctx->writer); mkvmuxer::Segment *const segment = - reinterpret_cast<mkvmuxer::Segment*>(glob->segment); + reinterpret_cast<mkvmuxer::Segment*>(webm_ctx->segment); segment->Finalize(); delete segment; delete writer; - glob->writer = NULL; - glob->segment = NULL; + webm_ctx->writer = NULL; + webm_ctx->segment = NULL; } diff --git a/libvpx/webmenc.h b/libvpx/webmenc.h index c255d3de6..ad30664e3 100644 --- a/libvpx/webmenc.h +++ b/libvpx/webmenc.h @@ -20,8 +20,7 @@ extern "C" { #endif -/* TODO(vigneshv): Rename this struct */ -struct EbmlGlobal { +struct WebmOutputContext { int debug; FILE *stream; int64_t last_pts_ns; @@ -38,18 +37,18 @@ typedef enum stereo_format { STEREO_FORMAT_RIGHT_LEFT = 11 } stereo_format_t; -void write_webm_file_header(struct EbmlGlobal *glob, +void write_webm_file_header(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const struct vpx_rational *fps, stereo_format_t stereo_fmt, unsigned int fourcc, const struct VpxRational *par); -void write_webm_block(struct EbmlGlobal *glob, +void write_webm_block(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt); -void write_webm_file_footer(struct EbmlGlobal *glob); +void write_webm_file_footer(struct WebmOutputContext *webm_ctx); #ifdef __cplusplus } // extern "C" |