diff options
author | Torne (Richard Coles) <torne@google.com> | 2014-06-24 11:04:27 +0100 |
---|---|---|
committer | Torne (Richard Coles) <torne@google.com> | 2014-06-24 11:04:27 +0100 |
commit | c1633d58a5bb3344df388ccd1c12445a6dfd3098 (patch) | |
tree | 2ae99634b3c1b3d9d69eb6e531455175a81dda6c | |
parent | d77dc4514a925c51ea9a72901526e45e361f55c8 (diff) | |
parent | db9ac6c76553d95d7eb35e2bcf84c16a7901c3c3 (diff) | |
download | libvpx-c1633d58a5bb3344df388ccd1c12445a6dfd3098.tar.gz |
Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at db9ac6c76553d95d7eb35e2bcf84c16a7901c3c3
This commit was generated by merge_from_chromium.py.
Change-Id: Idb617399bc04dbbe59bbf8e499a42d67308f92c3
165 files changed, 3963 insertions, 2637 deletions
diff --git a/README.chromium b/README.chromium index 8f30d03..569b4b9 100644 --- a/README.chromium +++ b/README.chromium @@ -5,9 +5,9 @@ License: BSD License File: source/libvpx/LICENSE Security Critical: yes -Date: Thursday May 8 2014 +Date: Friday May 16 2014 Branch: master -Commit: 91344f0a36f83d73af1f5325be792235eb021802 +Commit: ed83c2a94c8664a6d2e54b21771c0560b2bb90ac Description: Contains the sources used to compile libvpx binaries used by Google Chrome and diff --git a/generate_gypi.sh b/generate_gypi.sh index 6ca1b49..0a58086 100755 --- a/generate_gypi.sh +++ b/generate_gypi.sh @@ -227,9 +227,13 @@ function make_clean { # Lint a pair of vpx_config.h and vpx_config.asm to make sure they match. # $1 - Header file directory. function lint_config { - $BASE_DIR/lint_config.sh \ - -h $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.h \ - -a $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.asm + # mips does not contain any assembly so the header does not need to be + # compared to the asm. + if [[ "$1" != *mipsel ]]; then + $BASE_DIR/lint_config.sh \ + -h $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.h \ + -a $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.asm + fi } # Print the configuration. @@ -330,9 +334,10 @@ echo "Generate Config Files" all_platforms="--enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2" gen_config_files linux/ia32 "--target=x86-linux-gcc --disable-ccache --enable-pic --enable-realtime-only ${all_platforms}" gen_config_files linux/x64 "--target=x86_64-linux-gcc --disable-ccache --enable-pic --enable-realtime-only ${all_platforms}" -gen_config_files linux/arm "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs ${all_platforms}" -gen_config_files linux/arm-neon "--target=armv7-linux-gcc --enable-pic --enable-realtime-only ${all_platforms}" -gen_config_files linux/arm-neon-cpu-detect "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect ${all_platforms}" +gen_config_files linux/arm "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp ${all_platforms}" +gen_config_files linux/arm-neon "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp ${all_platforms}" +gen_config_files linux/arm-neon-cpu-detect "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp ${all_platforms}" +gen_config_files linux/arm64 "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp ${all_platforms}" gen_config_files linux/mipsel "--target=mips32-linux-gcc --disable-fast-unaligned ${all_platforms}" gen_config_files linux/generic "--target=generic-gnu --enable-pic --enable-realtime-only ${all_platforms}" gen_config_files win/ia32 "--target=x86-win32-vs12 --enable-realtime-only ${all_platforms}" @@ -351,6 +356,8 @@ lint_config linux/x64 lint_config linux/arm lint_config linux/arm-neon lint_config linux/arm-neon-cpu-detect +lint_config linux/arm64 +lint_config linux/mipsel lint_config linux/generic lint_config win/ia32 lint_config win/x64 @@ -369,6 +376,7 @@ gen_rtcd_header linux/x64 x86_64 gen_rtcd_header linux/arm armv6 gen_rtcd_header linux/arm-neon armv7 gen_rtcd_header linux/arm-neon-cpu-detect armv7 +gen_rtcd_header linux/arm64 armv8 gen_rtcd_header linux/mipsel mipsel gen_rtcd_header linux/generic generic gen_rtcd_header win/ia32 x86 @@ -414,6 +422,12 @@ make_clean make libvpx_srcs.txt target=libs $config > /dev/null convert_srcs_to_gypi libvpx_srcs.txt libvpx_srcs_arm_neon_cpu_detect +echo "Generate ARM64 source list." +config=$(print_config linux/arm64) +make_clean +make libvpx_srcs.txt target=libs $config > /dev/null +convert_srcs_to_gypi libvpx_srcs.txt libvpx_srcs_arm64 + echo "Generate MIPS source list." config=$(print_config_basic linux/mipsel) make_clean @@ -29,7 +29,7 @@ ], }], ['target_arch=="arm64"', { - 'target_arch_full': 'generic', + 'target_arch_full': 'arm64', }], ], }], @@ -53,7 +53,7 @@ 'variables': { 'conditions': [ ['OS=="win" and buildtype=="Official"', { - # Do not set to 'size', as it results in an error on win64. + # Do not set to 'size', as it results in an error on win64. 'optimize' :'speed', }], ], @@ -132,7 +132,7 @@ ], }], ['target_arch=="arm64"', { - 'includes': [ 'libvpx_srcs_generic.gypi', ], + 'includes': [ 'libvpx_srcs_arm64.gypi', ], }], ['target_arch=="x64"', { 'conditions': [ @@ -280,14 +280,6 @@ 'ads2gas_script_path': '<(libvpx_source)/build/make/<(ads2gas_script)', 'ads2gas_script_include': '<(libvpx_source)/build/make/thumb.pm', }, - # We need to explicitly tell the assembler to look for - # .include directive files from the place where they're - # generated to. - 'cflags': [ - '-Wa,-I,<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)', - '-Wa,-I,<!(pwd)/source/config', - '-Wa,-I,<(shared_generated_dir)', - ], 'xcode_settings': { 'OTHER_CFLAGS': [ '-I<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)', @@ -305,7 +297,21 @@ '<(libvpx_source)', ], }, + # We need to explicitly tell the assembler to look for + # .include directive files from the place where they're + # generated to. + 'cflags': [ + '-Wa,-I,<(shared_generated_dir)', + ], 'conditions': [ + # For Android WebView, the following pathc are not required and not + # allowed, because they generate the absolute path. + ['android_webview_build!=1', { + 'cflags': [ + '-Wa,-I,<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)', + '-Wa,-I,<!(pwd)/source/config', + ], + }], # Libvpx optimizations for ARMv6 or ARMv7 without NEON. ['arm_neon==0', { 'conditions': [ @@ -457,7 +463,7 @@ ['android_webview_build==1', { # pass the empty string for 3rd and 4th arguments of # intermediates-dir-for macro. - 'lib_intermediate_name' : '$(realpath $(call intermediates-dir-for, STATIC_LIBRARIES, libvpx_asm_offsets_vp8,,, $(GYP_VAR_PREFIX)))/libvpx_asm_offsets_vp8.a', + 'lib_intermediate_name' : '$(abspath $(call intermediates-dir-for,STATIC_LIBRARIES,libvpx_asm_offsets_vp8,,,$(gyp_var_prefix)))/libvpx_asm_offsets_vp8.a', }], ['(target_arch=="arm" or target_arch=="armv7")', { 'output_format': 'gas', @@ -530,7 +536,7 @@ ['android_webview_build==1', { # pass the empty string for 3rd and 4th arguments of # intermediates-dir-for macro. - 'lib_intermediate_name' : '<(android_src)/$(call intermediates-dir-for, STATIC_LIBRARIES, libvpx_asm_offsets_vpx_scale,,, $(GYP_VAR_PREFIX))/libvpx_asm_offsets_vpx_scale.a', + 'lib_intermediate_name' : '$(abspath $(call intermediates-dir-for,STATIC_LIBRARIES,libvpx_asm_offsets_vpx_scale,,,$(gyp_var_prefix)))/libvpx_asm_offsets_vpx_scale.a', }], ['(target_arch=="arm" or target_arch=="armv7")', { 'output_format': 'gas', diff --git a/libvpx_srcs_arm.gypi b/libvpx_srcs_arm.gypi index 904b9ed..2e6dd9b 100644 --- a/libvpx_srcs_arm.gypi +++ b/libvpx_srcs_arm.gypi @@ -33,7 +33,6 @@ '<(libvpx_source)/vp8/common/arm/dequantize_arm.c', '<(libvpx_source)/vp8/common/arm/filter_arm.c', '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c', - '<(libvpx_source)/vp8/common/arm/reconintra_arm.c', '<(libvpx_source)/vp8/common/arm/variance_arm.c', '<(libvpx_source)/vp8/common/blockd.c', '<(libvpx_source)/vp8/common/blockd.h', @@ -106,21 +105,17 @@ '<(libvpx_source)/vp8/decoder/onyxd_int.h', '<(libvpx_source)/vp8/decoder/threading.c', '<(libvpx_source)/vp8/decoder/treereader.h', - '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm', - '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c', '<(libvpx_source)/vp8/encoder/arm/dct_arm.c', '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c', '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', '<(libvpx_source)/vp8/encoder/block.h', + '<(libvpx_source)/vp8/encoder/boolhuff.c', '<(libvpx_source)/vp8/encoder/boolhuff.h', '<(libvpx_source)/vp8/encoder/dct.c', '<(libvpx_source)/vp8/encoder/dct_value_cost.h', diff --git a/libvpx_srcs_arm64.gypi b/libvpx_srcs_arm64.gypi new file mode 100644 index 0000000..46aeedb --- /dev/null +++ b/libvpx_srcs_arm64.gypi @@ -0,0 +1,325 @@ +# This file is generated. Do not edit. +# Copyright (c) 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +{ + 'sources': [ + '<(libvpx_source)/vp8/common/alloccommon.c', + '<(libvpx_source)/vp8/common/alloccommon.h', + '<(libvpx_source)/vp8/common/arm/dequantize_arm.c', + '<(libvpx_source)/vp8/common/arm/filter_arm.c', + '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c', + '<(libvpx_source)/vp8/common/arm/neon/bilinearpredict_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/copymem_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/dc_only_idct_add_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/sad_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c', + '<(libvpx_source)/vp8/common/arm/variance_arm.c', + '<(libvpx_source)/vp8/common/blockd.c', + '<(libvpx_source)/vp8/common/blockd.h', + '<(libvpx_source)/vp8/common/coefupdateprobs.h', + '<(libvpx_source)/vp8/common/common.h', + '<(libvpx_source)/vp8/common/debugmodes.c', + '<(libvpx_source)/vp8/common/default_coef_probs.h', + '<(libvpx_source)/vp8/common/dequantize.c', + '<(libvpx_source)/vp8/common/entropy.c', + '<(libvpx_source)/vp8/common/entropy.h', + '<(libvpx_source)/vp8/common/entropymode.c', + '<(libvpx_source)/vp8/common/entropymode.h', + '<(libvpx_source)/vp8/common/entropymv.c', + '<(libvpx_source)/vp8/common/entropymv.h', + '<(libvpx_source)/vp8/common/extend.c', + '<(libvpx_source)/vp8/common/extend.h', + '<(libvpx_source)/vp8/common/filter.c', + '<(libvpx_source)/vp8/common/filter.h', + '<(libvpx_source)/vp8/common/findnearmv.c', + '<(libvpx_source)/vp8/common/findnearmv.h', + '<(libvpx_source)/vp8/common/generic/systemdependent.c', + '<(libvpx_source)/vp8/common/header.h', + '<(libvpx_source)/vp8/common/idct_blk.c', + '<(libvpx_source)/vp8/common/idctllm.c', + '<(libvpx_source)/vp8/common/invtrans.h', + '<(libvpx_source)/vp8/common/loopfilter.c', + '<(libvpx_source)/vp8/common/loopfilter.h', + '<(libvpx_source)/vp8/common/loopfilter_filters.c', + '<(libvpx_source)/vp8/common/mbpitch.c', + '<(libvpx_source)/vp8/common/mfqe.c', + '<(libvpx_source)/vp8/common/modecont.c', + '<(libvpx_source)/vp8/common/modecont.h', + '<(libvpx_source)/vp8/common/mv.h', + '<(libvpx_source)/vp8/common/onyx.h', + '<(libvpx_source)/vp8/common/onyxc_int.h', + '<(libvpx_source)/vp8/common/onyxd.h', + '<(libvpx_source)/vp8/common/postproc.c', + '<(libvpx_source)/vp8/common/postproc.h', + '<(libvpx_source)/vp8/common/ppflags.h', + '<(libvpx_source)/vp8/common/pragmas.h', + '<(libvpx_source)/vp8/common/quant_common.c', + '<(libvpx_source)/vp8/common/quant_common.h', + '<(libvpx_source)/vp8/common/reconinter.c', + '<(libvpx_source)/vp8/common/reconinter.h', + '<(libvpx_source)/vp8/common/reconintra.c', + '<(libvpx_source)/vp8/common/reconintra4x4.c', + '<(libvpx_source)/vp8/common/reconintra4x4.h', + '<(libvpx_source)/vp8/common/rtcd.c', + '<(libvpx_source)/vp8/common/sad_c.c', + '<(libvpx_source)/vp8/common/setupintrarecon.c', + '<(libvpx_source)/vp8/common/setupintrarecon.h', + '<(libvpx_source)/vp8/common/swapyv12buffer.c', + '<(libvpx_source)/vp8/common/swapyv12buffer.h', + '<(libvpx_source)/vp8/common/systemdependent.h', + '<(libvpx_source)/vp8/common/threading.h', + '<(libvpx_source)/vp8/common/treecoder.c', + '<(libvpx_source)/vp8/common/treecoder.h', + '<(libvpx_source)/vp8/common/variance.h', + '<(libvpx_source)/vp8/common/variance_c.c', + '<(libvpx_source)/vp8/common/vp8_entropymodedata.h', + '<(libvpx_source)/vp8/decoder/dboolhuff.c', + '<(libvpx_source)/vp8/decoder/dboolhuff.h', + '<(libvpx_source)/vp8/decoder/decodeframe.c', + '<(libvpx_source)/vp8/decoder/decodemv.c', + '<(libvpx_source)/vp8/decoder/decodemv.h', + '<(libvpx_source)/vp8/decoder/decoderthreading.h', + '<(libvpx_source)/vp8/decoder/detokenize.c', + '<(libvpx_source)/vp8/decoder/detokenize.h', + '<(libvpx_source)/vp8/decoder/onyxd_if.c', + '<(libvpx_source)/vp8/decoder/onyxd_int.h', + '<(libvpx_source)/vp8/decoder/threading.c', + '<(libvpx_source)/vp8/decoder/treereader.h', + '<(libvpx_source)/vp8/encoder/arm/dct_arm.c', + '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c', + '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c', + '<(libvpx_source)/vp8/encoder/bitstream.c', + '<(libvpx_source)/vp8/encoder/bitstream.h', + '<(libvpx_source)/vp8/encoder/block.h', + '<(libvpx_source)/vp8/encoder/boolhuff.c', + '<(libvpx_source)/vp8/encoder/boolhuff.h', + '<(libvpx_source)/vp8/encoder/dct.c', + '<(libvpx_source)/vp8/encoder/dct_value_cost.h', + '<(libvpx_source)/vp8/encoder/dct_value_tokens.h', + '<(libvpx_source)/vp8/encoder/defaultcoefcounts.h', + '<(libvpx_source)/vp8/encoder/denoising.c', + '<(libvpx_source)/vp8/encoder/denoising.h', + '<(libvpx_source)/vp8/encoder/encodeframe.c', + '<(libvpx_source)/vp8/encoder/encodeframe.h', + '<(libvpx_source)/vp8/encoder/encodeintra.c', + '<(libvpx_source)/vp8/encoder/encodeintra.h', + '<(libvpx_source)/vp8/encoder/encodemb.c', + '<(libvpx_source)/vp8/encoder/encodemb.h', + '<(libvpx_source)/vp8/encoder/encodemv.c', + '<(libvpx_source)/vp8/encoder/encodemv.h', + '<(libvpx_source)/vp8/encoder/ethreading.c', + '<(libvpx_source)/vp8/encoder/firstpass.h', + '<(libvpx_source)/vp8/encoder/lookahead.c', + '<(libvpx_source)/vp8/encoder/lookahead.h', + '<(libvpx_source)/vp8/encoder/mcomp.c', + '<(libvpx_source)/vp8/encoder/mcomp.h', + '<(libvpx_source)/vp8/encoder/modecosts.c', + '<(libvpx_source)/vp8/encoder/modecosts.h', + '<(libvpx_source)/vp8/encoder/mr_dissim.c', + '<(libvpx_source)/vp8/encoder/mr_dissim.h', + '<(libvpx_source)/vp8/encoder/onyx_if.c', + '<(libvpx_source)/vp8/encoder/onyx_int.h', + '<(libvpx_source)/vp8/encoder/pickinter.c', + '<(libvpx_source)/vp8/encoder/pickinter.h', + '<(libvpx_source)/vp8/encoder/picklpf.c', + '<(libvpx_source)/vp8/encoder/quantize.c', + '<(libvpx_source)/vp8/encoder/quantize.h', + '<(libvpx_source)/vp8/encoder/ratectrl.c', + '<(libvpx_source)/vp8/encoder/ratectrl.h', + '<(libvpx_source)/vp8/encoder/rdopt.c', + '<(libvpx_source)/vp8/encoder/rdopt.h', + '<(libvpx_source)/vp8/encoder/segmentation.c', + '<(libvpx_source)/vp8/encoder/segmentation.h', + '<(libvpx_source)/vp8/encoder/tokenize.c', + '<(libvpx_source)/vp8/encoder/tokenize.h', + '<(libvpx_source)/vp8/encoder/treewriter.c', + '<(libvpx_source)/vp8/encoder/treewriter.h', + '<(libvpx_source)/vp8/vp8_cx_iface.c', + '<(libvpx_source)/vp8/vp8_dx_iface.c', + '<(libvpx_source)/vp9/common/vp9_alloccommon.c', + '<(libvpx_source)/vp9/common/vp9_alloccommon.h', + '<(libvpx_source)/vp9/common/vp9_blockd.c', + '<(libvpx_source)/vp9/common/vp9_blockd.h', + '<(libvpx_source)/vp9/common/vp9_common.h', + '<(libvpx_source)/vp9/common/vp9_common_data.c', + '<(libvpx_source)/vp9/common/vp9_common_data.h', + '<(libvpx_source)/vp9/common/vp9_convolve.c', + '<(libvpx_source)/vp9/common/vp9_convolve.h', + '<(libvpx_source)/vp9/common/vp9_debugmodes.c', + '<(libvpx_source)/vp9/common/vp9_entropy.c', + '<(libvpx_source)/vp9/common/vp9_entropy.h', + '<(libvpx_source)/vp9/common/vp9_entropymode.c', + '<(libvpx_source)/vp9/common/vp9_entropymode.h', + '<(libvpx_source)/vp9/common/vp9_entropymv.c', + '<(libvpx_source)/vp9/common/vp9_entropymv.h', + '<(libvpx_source)/vp9/common/vp9_enums.h', + '<(libvpx_source)/vp9/common/vp9_filter.c', + '<(libvpx_source)/vp9/common/vp9_filter.h', + '<(libvpx_source)/vp9/common/vp9_frame_buffers.c', + '<(libvpx_source)/vp9/common/vp9_frame_buffers.h', + '<(libvpx_source)/vp9/common/vp9_idct.c', + '<(libvpx_source)/vp9/common/vp9_idct.h', + '<(libvpx_source)/vp9/common/vp9_loopfilter.c', + '<(libvpx_source)/vp9/common/vp9_loopfilter.h', + '<(libvpx_source)/vp9/common/vp9_loopfilter_filters.c', + '<(libvpx_source)/vp9/common/vp9_mv.h', + '<(libvpx_source)/vp9/common/vp9_mvref_common.c', + '<(libvpx_source)/vp9/common/vp9_mvref_common.h', + '<(libvpx_source)/vp9/common/vp9_onyxc_int.h', + '<(libvpx_source)/vp9/common/vp9_ppflags.h', + '<(libvpx_source)/vp9/common/vp9_pragmas.h', + '<(libvpx_source)/vp9/common/vp9_pred_common.c', + '<(libvpx_source)/vp9/common/vp9_pred_common.h', + '<(libvpx_source)/vp9/common/vp9_prob.c', + '<(libvpx_source)/vp9/common/vp9_prob.h', + '<(libvpx_source)/vp9/common/vp9_quant_common.c', + '<(libvpx_source)/vp9/common/vp9_quant_common.h', + '<(libvpx_source)/vp9/common/vp9_reconinter.c', + '<(libvpx_source)/vp9/common/vp9_reconinter.h', + '<(libvpx_source)/vp9/common/vp9_reconintra.c', + '<(libvpx_source)/vp9/common/vp9_reconintra.h', + '<(libvpx_source)/vp9/common/vp9_rtcd.c', + '<(libvpx_source)/vp9/common/vp9_scale.c', + '<(libvpx_source)/vp9/common/vp9_scale.h', + '<(libvpx_source)/vp9/common/vp9_scan.c', + '<(libvpx_source)/vp9/common/vp9_scan.h', + '<(libvpx_source)/vp9/common/vp9_seg_common.c', + '<(libvpx_source)/vp9/common/vp9_seg_common.h', + '<(libvpx_source)/vp9/common/vp9_systemdependent.h', + '<(libvpx_source)/vp9/common/vp9_textblit.h', + '<(libvpx_source)/vp9/common/vp9_tile_common.c', + '<(libvpx_source)/vp9/common/vp9_tile_common.h', + '<(libvpx_source)/vp9/decoder/vp9_decodeframe.c', + '<(libvpx_source)/vp9/decoder/vp9_decodeframe.h', + '<(libvpx_source)/vp9/decoder/vp9_decodemv.c', + '<(libvpx_source)/vp9/decoder/vp9_decodemv.h', + '<(libvpx_source)/vp9/decoder/vp9_decoder.c', + '<(libvpx_source)/vp9/decoder/vp9_decoder.h', + '<(libvpx_source)/vp9/decoder/vp9_detokenize.c', + '<(libvpx_source)/vp9/decoder/vp9_detokenize.h', + '<(libvpx_source)/vp9/decoder/vp9_dsubexp.c', + '<(libvpx_source)/vp9/decoder/vp9_dsubexp.h', + '<(libvpx_source)/vp9/decoder/vp9_dthread.c', + '<(libvpx_source)/vp9/decoder/vp9_dthread.h', + '<(libvpx_source)/vp9/decoder/vp9_read_bit_buffer.c', + '<(libvpx_source)/vp9/decoder/vp9_read_bit_buffer.h', + '<(libvpx_source)/vp9/decoder/vp9_reader.c', + '<(libvpx_source)/vp9/decoder/vp9_reader.h', + '<(libvpx_source)/vp9/decoder/vp9_thread.c', + '<(libvpx_source)/vp9/decoder/vp9_thread.h', + '<(libvpx_source)/vp9/encoder/vp9_aq_complexity.c', + '<(libvpx_source)/vp9/encoder/vp9_aq_complexity.h', + '<(libvpx_source)/vp9/encoder/vp9_aq_cyclicrefresh.c', + '<(libvpx_source)/vp9/encoder/vp9_aq_cyclicrefresh.h', + '<(libvpx_source)/vp9/encoder/vp9_aq_variance.c', + '<(libvpx_source)/vp9/encoder/vp9_aq_variance.h', + '<(libvpx_source)/vp9/encoder/vp9_bitstream.c', + '<(libvpx_source)/vp9/encoder/vp9_bitstream.h', + '<(libvpx_source)/vp9/encoder/vp9_block.h', + '<(libvpx_source)/vp9/encoder/vp9_context_tree.c', + '<(libvpx_source)/vp9/encoder/vp9_context_tree.h', + '<(libvpx_source)/vp9/encoder/vp9_cost.c', + '<(libvpx_source)/vp9/encoder/vp9_cost.h', + '<(libvpx_source)/vp9/encoder/vp9_dct.c', + '<(libvpx_source)/vp9/encoder/vp9_encodeframe.c', + '<(libvpx_source)/vp9/encoder/vp9_encodeframe.h', + '<(libvpx_source)/vp9/encoder/vp9_encodemb.c', + '<(libvpx_source)/vp9/encoder/vp9_encodemb.h', + '<(libvpx_source)/vp9/encoder/vp9_encodemv.c', + '<(libvpx_source)/vp9/encoder/vp9_encodemv.h', + '<(libvpx_source)/vp9/encoder/vp9_encoder.c', + '<(libvpx_source)/vp9/encoder/vp9_encoder.h', + '<(libvpx_source)/vp9/encoder/vp9_extend.c', + '<(libvpx_source)/vp9/encoder/vp9_extend.h', + '<(libvpx_source)/vp9/encoder/vp9_firstpass.c', + '<(libvpx_source)/vp9/encoder/vp9_firstpass.h', + '<(libvpx_source)/vp9/encoder/vp9_lookahead.c', + '<(libvpx_source)/vp9/encoder/vp9_lookahead.h', + '<(libvpx_source)/vp9/encoder/vp9_mbgraph.c', + '<(libvpx_source)/vp9/encoder/vp9_mbgraph.h', + '<(libvpx_source)/vp9/encoder/vp9_mcomp.c', + '<(libvpx_source)/vp9/encoder/vp9_mcomp.h', + '<(libvpx_source)/vp9/encoder/vp9_picklpf.c', + '<(libvpx_source)/vp9/encoder/vp9_picklpf.h', + '<(libvpx_source)/vp9/encoder/vp9_pickmode.c', + '<(libvpx_source)/vp9/encoder/vp9_pickmode.h', + '<(libvpx_source)/vp9/encoder/vp9_quantize.c', + '<(libvpx_source)/vp9/encoder/vp9_quantize.h', + '<(libvpx_source)/vp9/encoder/vp9_ratectrl.c', + '<(libvpx_source)/vp9/encoder/vp9_ratectrl.h', + '<(libvpx_source)/vp9/encoder/vp9_rdopt.c', + '<(libvpx_source)/vp9/encoder/vp9_rdopt.h', + '<(libvpx_source)/vp9/encoder/vp9_resize.c', + '<(libvpx_source)/vp9/encoder/vp9_resize.h', + '<(libvpx_source)/vp9/encoder/vp9_sad.c', + '<(libvpx_source)/vp9/encoder/vp9_segmentation.c', + '<(libvpx_source)/vp9/encoder/vp9_segmentation.h', + '<(libvpx_source)/vp9/encoder/vp9_speed_features.c', + '<(libvpx_source)/vp9/encoder/vp9_speed_features.h', + '<(libvpx_source)/vp9/encoder/vp9_subexp.c', + '<(libvpx_source)/vp9/encoder/vp9_subexp.h', + '<(libvpx_source)/vp9/encoder/vp9_svc_layercontext.c', + '<(libvpx_source)/vp9/encoder/vp9_svc_layercontext.h', + '<(libvpx_source)/vp9/encoder/vp9_temporal_filter.c', + '<(libvpx_source)/vp9/encoder/vp9_temporal_filter.h', + '<(libvpx_source)/vp9/encoder/vp9_tokenize.c', + '<(libvpx_source)/vp9/encoder/vp9_tokenize.h', + '<(libvpx_source)/vp9/encoder/vp9_treewriter.c', + '<(libvpx_source)/vp9/encoder/vp9_treewriter.h', + '<(libvpx_source)/vp9/encoder/vp9_variance.c', + '<(libvpx_source)/vp9/encoder/vp9_variance.h', + '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.c', + '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h', + '<(libvpx_source)/vp9/encoder/vp9_writer.c', + '<(libvpx_source)/vp9/encoder/vp9_writer.h', + '<(libvpx_source)/vp9/vp9_cx_iface.c', + '<(libvpx_source)/vp9/vp9_dx_iface.c', + '<(libvpx_source)/vp9/vp9_iface_common.h', + '<(libvpx_source)/vpx/internal/vpx_codec_internal.h', + '<(libvpx_source)/vpx/internal/vpx_psnr.h', + '<(libvpx_source)/vpx/src/svc_encodeframe.c', + '<(libvpx_source)/vpx/src/vpx_codec.c', + '<(libvpx_source)/vpx/src/vpx_decoder.c', + '<(libvpx_source)/vpx/src/vpx_encoder.c', + '<(libvpx_source)/vpx/src/vpx_image.c', + '<(libvpx_source)/vpx/src/vpx_psnr.c', + '<(libvpx_source)/vpx/svc_context.h', + '<(libvpx_source)/vpx/vp8.h', + '<(libvpx_source)/vpx/vp8cx.h', + '<(libvpx_source)/vpx/vp8dx.h', + '<(libvpx_source)/vpx/vpx_codec.h', + '<(libvpx_source)/vpx/vpx_decoder.h', + '<(libvpx_source)/vpx/vpx_encoder.h', + '<(libvpx_source)/vpx/vpx_frame_buffer.h', + '<(libvpx_source)/vpx/vpx_image.h', + '<(libvpx_source)/vpx/vpx_integer.h', + '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h', + '<(libvpx_source)/vpx_mem/vpx_mem.c', + '<(libvpx_source)/vpx_mem/vpx_mem.h', + '<(libvpx_source)/vpx_ports/arm.h', + '<(libvpx_source)/vpx_ports/arm_cpudetect.c', + '<(libvpx_source)/vpx_ports/asm_offsets.h', + '<(libvpx_source)/vpx_ports/emmintrin_compat.h', + '<(libvpx_source)/vpx_ports/mem.h', + '<(libvpx_source)/vpx_ports/mem_ops.h', + '<(libvpx_source)/vpx_ports/mem_ops_aligned.h', + '<(libvpx_source)/vpx_ports/vpx_once.h', + '<(libvpx_source)/vpx_ports/vpx_timer.h', + '<(libvpx_source)/vpx_scale/generic/gen_scalers.c', + '<(libvpx_source)/vpx_scale/generic/vpx_scale.c', + '<(libvpx_source)/vpx_scale/generic/yv12config.c', + '<(libvpx_source)/vpx_scale/generic/yv12extend.c', + '<(libvpx_source)/vpx_scale/vpx_scale.h', + '<(libvpx_source)/vpx_scale/vpx_scale_rtcd.c', + '<(libvpx_source)/vpx_scale/yv12config.h', + ], +} diff --git a/libvpx_srcs_arm_neon.gypi b/libvpx_srcs_arm_neon.gypi index 13735d3..01e9bf9 100644 --- a/libvpx_srcs_arm_neon.gypi +++ b/libvpx_srcs_arm_neon.gypi @@ -34,14 +34,13 @@ '<(libvpx_source)/vp8/common/arm/filter_arm.c', '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c', '<(libvpx_source)/vp8/common/arm/neon/bilinearpredict_neon.c', - '<(libvpx_source)/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/copymem_neon.c', '<(libvpx_source)/vp8/common/arm/neon/dc_only_idct_add_neon.c', '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c', '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c', '<(libvpx_source)/vp8/common/arm/neon/idct_blk_neon.c', - '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.c', - '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.c', + '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm', + '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c', '<(libvpx_source)/vp8/common/arm/neon/loopfilter_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c', @@ -54,7 +53,6 @@ '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm', - '<(libvpx_source)/vp8/common/arm/reconintra_arm.c', '<(libvpx_source)/vp8/common/arm/variance_arm.c', '<(libvpx_source)/vp8/common/blockd.c', '<(libvpx_source)/vp8/common/blockd.h', @@ -127,16 +125,11 @@ '<(libvpx_source)/vp8/decoder/onyxd_int.h', '<(libvpx_source)/vp8/decoder/threading.c', '<(libvpx_source)/vp8/decoder/treereader.h', - '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm', - '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c', '<(libvpx_source)/vp8/encoder/arm/dct_arm.c', '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c', '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm', @@ -150,6 +143,7 @@ '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', '<(libvpx_source)/vp8/encoder/block.h', + '<(libvpx_source)/vp8/encoder/boolhuff.c', '<(libvpx_source)/vp8/encoder/boolhuff.h', '<(libvpx_source)/vp8/encoder/dct.c', '<(libvpx_source)/vp8/encoder/dct_value_cost.h', diff --git a/libvpx_srcs_arm_neon_cpu_detect.gypi b/libvpx_srcs_arm_neon_cpu_detect.gypi index a7945f6..3a43d66 100644 --- a/libvpx_srcs_arm_neon_cpu_detect.gypi +++ b/libvpx_srcs_arm_neon_cpu_detect.gypi @@ -33,13 +33,13 @@ '<(libvpx_source)/vp8/common/arm/dequantize_arm.c', '<(libvpx_source)/vp8/common/arm/filter_arm.c', '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c', - '<(libvpx_source)/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm', + '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm', + '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/loopfilter_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm', '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm', - '<(libvpx_source)/vp8/common/arm/reconintra_arm.c', '<(libvpx_source)/vp8/common/arm/variance_arm.c', '<(libvpx_source)/vp8/common/blockd.c', '<(libvpx_source)/vp8/common/blockd.h', @@ -112,16 +112,11 @@ '<(libvpx_source)/vp8/decoder/onyxd_int.h', '<(libvpx_source)/vp8/decoder/threading.c', '<(libvpx_source)/vp8/decoder/treereader.h', - '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm', - '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm', '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm', - '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c', '<(libvpx_source)/vp8/encoder/arm/dct_arm.c', '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/picklpf_arm.c', @@ -134,6 +129,7 @@ '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', '<(libvpx_source)/vp8/encoder/block.h', + '<(libvpx_source)/vp8/encoder/boolhuff.c', '<(libvpx_source)/vp8/encoder/boolhuff.h', '<(libvpx_source)/vp8/encoder/dct.c', '<(libvpx_source)/vp8/encoder/dct_value_cost.h', diff --git a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi index 07c2c32..2359023 100644 --- a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi +++ b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi @@ -19,8 +19,6 @@ '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c', '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c', '<(libvpx_source)/vp8/common/arm/neon/idct_blk_neon.c', - '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.c', - '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.c', '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c', '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c', '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.c', diff --git a/libvpx_srcs_x86.gypi b/libvpx_srcs_x86.gypi index b274e8a..06b411a 100644 --- a/libvpx_srcs_x86.gypi +++ b/libvpx_srcs_x86.gypi @@ -309,6 +309,7 @@ '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h', '<(libvpx_source)/vp9/encoder/vp9_writer.c', '<(libvpx_source)/vp9/encoder/vp9_writer.h', + '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_sad4d_sse2.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_sad_mmx.asm', diff --git a/libvpx_srcs_x86_64.gypi b/libvpx_srcs_x86_64.gypi index 2633756..f048f2c 100644 --- a/libvpx_srcs_x86_64.gypi +++ b/libvpx_srcs_x86_64.gypi @@ -312,6 +312,7 @@ '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h', '<(libvpx_source)/vp9/encoder/vp9_writer.c', '<(libvpx_source)/vp9/encoder/vp9_writer.h', + '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_dct_ssse3.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm', '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_ssse3.asm', diff --git a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h index 77cc0b7..4ab0f6b 100644 --- a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h +++ b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h @@ -87,9 +87,9 @@ void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride, void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_neon(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); void vp8_dequant_idct_add_v6(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h index e15c480..6f28ce3 100644 --- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h +++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h @@ -279,9 +279,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); @@ -745,8 +745,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) vp9_idct4x4_16_add = vp9_idct4x4_16_add_neon; vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_NEON) vp9_idct4x4_1_add = vp9_idct4x4_1_add_neon; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; - if (flags & HAS_NEON) vp9_idct8x8_10_add = vp9_idct8x8_10_add_neon; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_c; + if (flags & HAS_NEON) vp9_idct8x8_12_add = vp9_idct8x8_12_add_neon; vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_NEON) vp9_idct8x8_1_add = vp9_idct8x8_1_add_neon; vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm index b15c213..3fa8266 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm @@ -7,9 +7,10 @@ .equ ARCH_X86_64 , 0 .equ ARCH_PPC32 , 0 .equ ARCH_PPC64 , 0 -.equ HAVE_EDSP , 1 +.equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 1 .equ HAVE_NEON , 1 +.equ HAVE_NEON_ASM , 1 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 .equ HAVE_MMX , 0 diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.c b/source/config/linux/arm-neon-cpu-detect/vpx_config.c index 95a39b3..3467569 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.c +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.c @@ -5,5 +5,5 @@ /* tree. An additional intellectual property rights grant can be found */ /* in the file PATENTS. All contributing project authors may */ /* be found in the AUTHORS file in the root of the source tree. */ -static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; +static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; const char *vpx_codec_build_config(void) {return cfg;} diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h index 89d030b..b3d8bf0 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h @@ -16,9 +16,10 @@ #define ARCH_X86_64 0 #define ARCH_PPC32 0 #define ARCH_PPC64 0 -#define HAVE_EDSP 1 +#define HAVE_EDSP 0 #define HAVE_MEDIA 1 #define HAVE_NEON 1 +#define HAVE_NEON_ASM 1 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/linux/arm-neon/vp8_rtcd.h b/source/config/linux/arm-neon/vp8_rtcd.h index a52d575..184b486 100644 --- a/source/config/linux/arm-neon/vp8_rtcd.h +++ b/source/config/linux/arm-neon/vp8_rtcd.h @@ -87,8 +87,8 @@ void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride, void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_neon -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_neon(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_neon void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h index a94d300..9e401dd 100644 --- a/source/config/linux/arm-neon/vp9_rtcd.h +++ b/source/config/linux/arm-neon/vp9_rtcd.h @@ -279,9 +279,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_neon -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_neon +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_neon void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride); diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm index 228a6ae..190cceb 100644 --- a/source/config/linux/arm-neon/vpx_config.asm +++ b/source/config/linux/arm-neon/vpx_config.asm @@ -7,9 +7,10 @@ .equ ARCH_X86_64 , 0 .equ ARCH_PPC32 , 0 .equ ARCH_PPC64 , 0 -.equ HAVE_EDSP , 1 +.equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 1 .equ HAVE_NEON , 1 +.equ HAVE_NEON_ASM , 1 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 .equ HAVE_MMX , 0 diff --git a/source/config/linux/arm-neon/vpx_config.c b/source/config/linux/arm-neon/vpx_config.c index 793f12a..c61708d 100644 --- a/source/config/linux/arm-neon/vpx_config.c +++ b/source/config/linux/arm-neon/vpx_config.c @@ -5,5 +5,5 @@ /* tree. An additional intellectual property rights grant can be found */ /* in the file PATENTS. All contributing project authors may */ /* be found in the AUTHORS file in the root of the source tree. */ -static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; +static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; const char *vpx_codec_build_config(void) {return cfg;} diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h index 2f5ddb1..97710cf 100644 --- a/source/config/linux/arm-neon/vpx_config.h +++ b/source/config/linux/arm-neon/vpx_config.h @@ -16,9 +16,10 @@ #define ARCH_X86_64 0 #define ARCH_PPC32 0 #define ARCH_PPC64 0 -#define HAVE_EDSP 1 +#define HAVE_EDSP 0 #define HAVE_MEDIA 1 #define HAVE_NEON 1 +#define HAVE_NEON_ASM 1 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/linux/arm/vp8_rtcd.h b/source/config/linux/arm/vp8_rtcd.h index ab5fa41..20208cc 100644 --- a/source/config/linux/arm/vp8_rtcd.h +++ b/source/config/linux/arm/vp8_rtcd.h @@ -79,7 +79,7 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_v6 -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_c void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h index 1c0f5f2..79faee9 100644 --- a/source/config/linux/arm/vp9_rtcd.h +++ b/source/config/linux/arm/vp9_rtcd.h @@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm index c99dea6..27b450b 100644 --- a/source/config/linux/arm/vpx_config.asm +++ b/source/config/linux/arm/vpx_config.asm @@ -7,9 +7,10 @@ .equ ARCH_X86_64 , 0 .equ ARCH_PPC32 , 0 .equ ARCH_PPC64 , 0 -.equ HAVE_EDSP , 1 +.equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 1 .equ HAVE_NEON , 0 +.equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 .equ HAVE_MMX , 0 diff --git a/source/config/linux/arm/vpx_config.c b/source/config/linux/arm/vpx_config.c index be1757a..703e1d3 100644 --- a/source/config/linux/arm/vpx_config.c +++ b/source/config/linux/arm/vpx_config.c @@ -5,5 +5,5 @@ /* tree. An additional intellectual property rights grant can be found */ /* in the file PATENTS. All contributing project authors may */ /* be found in the AUTHORS file in the root of the source tree. */ -static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; +static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; const char *vpx_codec_build_config(void) {return cfg;} diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h index 50e0ea7..85aca7d 100644 --- a/source/config/linux/arm/vpx_config.h +++ b/source/config/linux/arm/vpx_config.h @@ -16,9 +16,10 @@ #define ARCH_X86_64 0 #define ARCH_PPC32 0 #define ARCH_PPC64 0 -#define HAVE_EDSP 1 +#define HAVE_EDSP 0 #define HAVE_MEDIA 1 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/linux/arm64/vp8_rtcd.h b/source/config/linux/arm64/vp8_rtcd.h new file mode 100644 index 0000000..9601515 --- /dev/null +++ b/source/config/linux/arm64/vp8_rtcd.h @@ -0,0 +1,378 @@ +#ifndef VP8_RTCD_H_ +#define VP8_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * VP8 + */ + +struct blockd; +struct macroblockd; +struct loop_filter_info; + +/* Encoder forward decls */ +struct block; +struct macroblock; +struct variance_vtable; +union int_mv; +struct yv12_buffer_config; + +void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_neon + +void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon + +void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_neon + +void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_neon + +void vp8_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride); +#define vp8_blend_b vp8_blend_b_c + +void vp8_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride); +#define vp8_blend_mb_inner vp8_blend_mb_inner_c + +void vp8_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride); +#define vp8_blend_mb_outer vp8_blend_mb_outer_c + +int vp8_block_error_c(short *coeff, short *dqcoeff); +#define vp8_block_error vp8_block_error_c + +void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); +#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c + +void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); +#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c + +void vp8_clear_system_state_c(); +#define vp8_clear_system_state vp8_clear_system_state_c + +void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem16x16 vp8_copy_mem16x16_neon + +void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem8x4 vp8_copy_mem8x4_neon + +void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem8x8 vp8_copy_mem8x8_neon + +void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +#define vp8_dc_only_idct_add vp8_dc_only_idct_add_neon + +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +#define vp8_denoiser_filter vp8_denoiser_filter_neon + +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); +void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride); +#define vp8_dequant_idct_add vp8_dequant_idct_add_neon + +void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c + +void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c + +void vp8_dequantize_b_c(struct blockd*, short *dqc); +void vp8_dequantize_b_neon(struct blockd*, short *dqc); +#define vp8_dequantize_b vp8_dequantize_b_neon + +int vp8_diamond_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); +#define vp8_diamond_search_sad vp8_diamond_search_sad_c + +void vp8_fast_quantize_b_c(struct block *, struct blockd *); +#define vp8_fast_quantize_b vp8_fast_quantize_b_c + +void vp8_fast_quantize_b_pair_c(struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2); +#define vp8_fast_quantize_b_pair vp8_fast_quantize_b_pair_c + +void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight); +#define vp8_filter_by_weight16x16 vp8_filter_by_weight16x16_c + +void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight); +#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c + +void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight); +#define vp8_filter_by_weight8x8 vp8_filter_by_weight8x8_c + +int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); +#define vp8_full_search_sad vp8_full_search_sad_c + +unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c + +unsigned int vp8_get_mb_ss_c(const short *); +#define vp8_get_mb_ss vp8_get_mb_ss_c + +void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); +#define vp8_intra4x4_predict vp8_intra4x4_predict_c + +void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_bh vp8_loop_filter_bh_c + +void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_bv vp8_loop_filter_bv_c + +void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_mbh vp8_loop_filter_mbh_neon + +void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_mbv vp8_loop_filter_mbv_neon + +void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_neon + +void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c + +void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_mbh vp8_loop_filter_mbhs_neon + +void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c + +int vp8_mbblock_error_c(struct macroblock *mb, int dc); +#define vp8_mbblock_error vp8_mbblock_error_c + +void vp8_mbpost_proc_across_ip_c(unsigned char *dst, int pitch, int rows, int cols,int flimit); +#define vp8_mbpost_proc_across_ip vp8_mbpost_proc_across_ip_c + +void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,int flimit); +#define vp8_mbpost_proc_down vp8_mbpost_proc_down_c + +int vp8_mbuverror_c(struct macroblock *mb); +#define vp8_mbuverror vp8_mbuverror_c + +unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_mse16x16 vp8_mse16x16_c + +void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch); +#define vp8_plane_add_noise vp8_plane_add_noise_c + +void vp8_post_proc_down_and_across_mb_row_c(unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size); +#define vp8_post_proc_down_and_across_mb_row vp8_post_proc_down_and_across_mb_row_c + +void vp8_quantize_mb_c(struct macroblock *); +void vp8_quantize_mb_neon(struct macroblock *); +#define vp8_quantize_mb vp8_quantize_mb_neon + +void vp8_quantize_mbuv_c(struct macroblock *); +void vp8_quantize_mbuv_neon(struct macroblock *); +#define vp8_quantize_mbuv vp8_quantize_mbuv_neon + +void vp8_quantize_mby_c(struct macroblock *); +void vp8_quantize_mby_neon(struct macroblock *); +#define vp8_quantize_mby vp8_quantize_mby_neon + +int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); +#define vp8_refining_search_sad vp8_refining_search_sad_c + +void vp8_regular_quantize_b_c(struct block *, struct blockd *); +#define vp8_regular_quantize_b vp8_regular_quantize_b_c + +void vp8_regular_quantize_b_pair_c(struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2); +#define vp8_regular_quantize_b_pair vp8_regular_quantize_b_pair_c + +unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +unsigned int vp8_sad16x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp8_sad16x16 vp8_sad16x16_neon + +void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp8_sad16x16x3 vp8_sad16x16x3_c + +void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp8_sad16x16x4d vp8_sad16x16x4d_c + +void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); +#define vp8_sad16x16x8 vp8_sad16x16x8_c + +unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +unsigned int vp8_sad16x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp8_sad16x8 vp8_sad16x8_neon + +void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp8_sad16x8x3 vp8_sad16x8x3_c + +void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp8_sad16x8x4d vp8_sad16x8x4d_c + +void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); +#define vp8_sad16x8x8 vp8_sad16x8x8_c + +unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +unsigned int vp8_sad4x4_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp8_sad4x4 vp8_sad4x4_neon + +void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp8_sad4x4x3 vp8_sad4x4x3_c + +void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp8_sad4x4x4d vp8_sad4x4x4d_c + +void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); +#define vp8_sad4x4x8 vp8_sad4x4x8_c + +unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +unsigned int vp8_sad8x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp8_sad8x16 vp8_sad8x16_neon + +void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp8_sad8x16x3 vp8_sad8x16x3_c + +void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp8_sad8x16x4d vp8_sad8x16x4d_c + +void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); +#define vp8_sad8x16x8 vp8_sad8x16x8_c + +unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +unsigned int vp8_sad8x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp8_sad8x8 vp8_sad8x8_neon + +void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp8_sad8x8x3 vp8_sad8x8x3_c + +void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp8_sad8x8x4d vp8_sad8x8x4d_c + +void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array); +#define vp8_sad8x8x8 vp8_sad8x8x8_c + +void vp8_short_fdct4x4_c(short *input, short *output, int pitch); +#define vp8_short_fdct4x4 vp8_short_fdct4x4_c + +void vp8_short_fdct8x4_c(short *input, short *output, int pitch); +#define vp8_short_fdct8x4 vp8_short_fdct8x4_c + +void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +#define vp8_short_idct4x4llm vp8_short_idct4x4llm_neon + +void vp8_short_inv_walsh4x4_c(short *input, short *output); +void vp8_short_inv_walsh4x4_neon(short *input, short *output); +#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_neon + +void vp8_short_inv_walsh4x4_1_c(short *input, short *output); +#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c + +void vp8_short_walsh4x4_c(short *input, short *output, int pitch); +#define vp8_short_walsh4x4 vp8_short_walsh4x4_c + +void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_neon + +void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_neon + +void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_neon + +void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon + +unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c + +unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c + +unsigned int vp8_sub_pixel_variance16x8_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance16x8 vp8_sub_pixel_variance16x8_c + +unsigned int vp8_sub_pixel_variance4x4_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance4x4 vp8_sub_pixel_variance4x4_c + +unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance8x16 vp8_sub_pixel_variance8x16_c + +unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse); +#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_c + +void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch); +#define vp8_subtract_b vp8_subtract_b_c + +void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride); +#define vp8_subtract_mbuv vp8_subtract_mbuv_c + +void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); +#define vp8_subtract_mby vp8_subtract_mby_c + +unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance16x16 vp8_variance16x16_neon + +unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance16x8 vp8_variance16x8_neon + +unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance4x4 vp8_variance4x4_c + +unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance8x16 vp8_variance8x16_neon + +unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance8x8 vp8_variance8x8_neon + +unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c + +unsigned int vp8_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c + +unsigned int vp8_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c + +void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c + +void vp8_rtcd(void); + +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/source/config/linux/arm64/vp9_rtcd.h b/source/config/linux/arm64/vp9_rtcd.h new file mode 100644 index 0000000..79faee9 --- /dev/null +++ b/source/config/linux/arm64/vp9_rtcd.h @@ -0,0 +1,671 @@ +#ifndef VP9_RTCD_H_ +#define VP9_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * VP9 + */ + +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_enums.h" + +struct macroblockd; + +/* Encoder forward decls */ +struct macroblock; +struct vp9_variance_vtable; +struct search_site_config; +struct mv; +union int_mv; +struct yv12_buffer_config; + +void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride); +#define vp9_blend_b vp9_blend_b_c + +void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride); +#define vp9_blend_mb_inner vp9_blend_mb_inner_c + +void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride); +#define vp9_blend_mb_outer vp9_blend_mb_outer_c + +int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz); +#define vp9_block_error vp9_block_error_c + +void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8 vp9_convolve8_c + +void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8_avg vp9_convolve8_avg_c + +void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8_avg_horiz vp9_convolve8_avg_horiz_c + +void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8_avg_vert vp9_convolve8_avg_vert_c + +void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8_horiz vp9_convolve8_horiz_c + +void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve8_vert vp9_convolve8_vert_c + +void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve_avg vp9_convolve_avg_c + +void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vp9_convolve_copy vp9_convolve_copy_c + +void vp9_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d117_predictor_16x16 vp9_d117_predictor_16x16_c + +void vp9_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d117_predictor_32x32 vp9_d117_predictor_32x32_c + +void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d117_predictor_4x4 vp9_d117_predictor_4x4_c + +void vp9_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d117_predictor_8x8 vp9_d117_predictor_8x8_c + +void vp9_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d135_predictor_16x16 vp9_d135_predictor_16x16_c + +void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c + +void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c + +void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c + +void vp9_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d153_predictor_16x16 vp9_d153_predictor_16x16_c + +void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c + +void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d153_predictor_4x4 vp9_d153_predictor_4x4_c + +void vp9_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d153_predictor_8x8 vp9_d153_predictor_8x8_c + +void vp9_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d207_predictor_16x16 vp9_d207_predictor_16x16_c + +void vp9_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d207_predictor_32x32 vp9_d207_predictor_32x32_c + +void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d207_predictor_4x4 vp9_d207_predictor_4x4_c + +void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c + +void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c + +void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c + +void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c + +void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c + +void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c + +void vp9_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d63_predictor_32x32 vp9_d63_predictor_32x32_c + +void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d63_predictor_4x4 vp9_d63_predictor_4x4_c + +void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c + +void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c + +void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c + +void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c + +void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c + +void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c + +void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c + +void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c + +void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c + +void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c + +void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c + +void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c + +void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c + +void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c + +void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c + +void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c + +void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c + +int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); +#define vp9_diamond_search_sad vp9_diamond_search_sad_c + +void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fdct16x16 vp9_fdct16x16_c + +void vp9_fdct32x32_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fdct32x32 vp9_fdct32x32_c + +void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fdct32x32_rd vp9_fdct32x32_rd_c + +void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fdct4x4 vp9_fdct4x4_c + +void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fdct8x8 vp9_fdct8x8_c + +void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type); +#define vp9_fht16x16 vp9_fht16x16_c + +void vp9_fht4x4_c(const int16_t *input, int16_t *output, int stride, int tx_type); +#define vp9_fht4x4 vp9_fht4x4_c + +void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type); +#define vp9_fht8x8 vp9_fht8x8_c + +int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); +#define vp9_full_range_search vp9_full_range_search_c + +int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); +#define vp9_full_search_sad vp9_full_search_sad_c + +void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); +#define vp9_fwht4x4 vp9_fwht4x4_c + +unsigned int vp9_get_mb_ss_c(const int16_t *); +#define vp9_get_mb_ss vp9_get_mb_ss_c + +void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c + +void vp9_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_h_predictor_32x32 vp9_h_predictor_32x32_c + +void vp9_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_h_predictor_4x4 vp9_h_predictor_4x4_c + +void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_h_predictor_8x8 vp9_h_predictor_8x8_c + +void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_c + +void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_1_add vp9_idct16x16_1_add_c + +void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_c + +void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct32x32_1024_add vp9_idct32x32_1024_add_c + +void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct32x32_1_add vp9_idct32x32_1_add_c + +void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct32x32_34_add vp9_idct32x32_34_add_c + +void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct4x4_16_add vp9_idct4x4_16_add_c + +void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct4x4_1_add vp9_idct4x4_1_add_c + +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c + +void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_1_add vp9_idct8x8_1_add_c + +void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_64_add vp9_idct8x8_64_add_c + +void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch, int tx_type); +#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c + +void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c + +void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c + +void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c + +void vp9_iwht4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c + +void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_c + +void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_lpf_horizontal_4 vp9_lpf_horizontal_4_c + +void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vp9_lpf_horizontal_4_dual vp9_lpf_horizontal_4_dual_c + +void vp9_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_lpf_horizontal_8 vp9_lpf_horizontal_8_c + +void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vp9_lpf_horizontal_8_dual vp9_lpf_horizontal_8_dual_c + +void vp9_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vp9_lpf_vertical_16 vp9_lpf_vertical_16_c + +void vp9_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vp9_lpf_vertical_16_dual vp9_lpf_vertical_16_dual_c + +void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_lpf_vertical_4 vp9_lpf_vertical_4_c + +void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vp9_lpf_vertical_4_dual vp9_lpf_vertical_4_dual_c + +void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); +#define vp9_lpf_vertical_8 vp9_lpf_vertical_8_c + +void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_c + +unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vp9_mse16x16 vp9_mse16x16_c + +unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vp9_mse16x8 vp9_mse16x8_c + +unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vp9_mse8x16 vp9_mse8x16_c + +unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define vp9_mse8x8 vp9_mse8x8_c + +void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define vp9_quantize_b vp9_quantize_b_c + +void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c + +int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); +#define vp9_refining_search_sad vp9_refining_search_sad_c + +unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad16x16 vp9_sad16x16_c + +unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad16x16_avg vp9_sad16x16_avg_c + +void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad16x16x3 vp9_sad16x16x3_c + +void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad16x16x4d vp9_sad16x16x4d_c + +void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad16x16x8 vp9_sad16x16x8_c + +unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad16x32 vp9_sad16x32_c + +unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad16x32_avg vp9_sad16x32_avg_c + +void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad16x32x4d vp9_sad16x32x4d_c + +unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad16x8 vp9_sad16x8_c + +unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad16x8_avg vp9_sad16x8_avg_c + +void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad16x8x3 vp9_sad16x8x3_c + +void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad16x8x4d vp9_sad16x8x4d_c + +void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad16x8x8 vp9_sad16x8x8_c + +unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad32x16 vp9_sad32x16_c + +unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad32x16_avg vp9_sad32x16_avg_c + +void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad32x16x4d vp9_sad32x16x4d_c + +unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad32x32 vp9_sad32x32_c + +unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad32x32_avg vp9_sad32x32_avg_c + +void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad32x32x3 vp9_sad32x32x3_c + +void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad32x32x4d vp9_sad32x32x4d_c + +void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad32x32x8 vp9_sad32x32x8_c + +unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad32x64 vp9_sad32x64_c + +unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad32x64_avg vp9_sad32x64_avg_c + +void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad32x64x4d vp9_sad32x64x4d_c + +unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad4x4 vp9_sad4x4_c + +unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad4x4_avg vp9_sad4x4_avg_c + +void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad4x4x3 vp9_sad4x4x3_c + +void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad4x4x4d vp9_sad4x4x4d_c + +void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad4x4x8 vp9_sad4x4x8_c + +unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad4x8 vp9_sad4x8_c + +unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad4x8_avg vp9_sad4x8_avg_c + +void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad4x8x4d vp9_sad4x8x4d_c + +void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad4x8x8 vp9_sad4x8x8_c + +unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad64x32 vp9_sad64x32_c + +unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad64x32_avg vp9_sad64x32_avg_c + +void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad64x32x4d vp9_sad64x32x4d_c + +unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad64x64 vp9_sad64x64_c + +unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad64x64_avg vp9_sad64x64_avg_c + +void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad64x64x3 vp9_sad64x64x3_c + +void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad64x64x4d vp9_sad64x64x4d_c + +void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad64x64x8 vp9_sad64x64x8_c + +unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad8x16 vp9_sad8x16_c + +unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad8x16_avg vp9_sad8x16_avg_c + +void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad8x16x3 vp9_sad8x16x3_c + +void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad8x16x4d vp9_sad8x16x4d_c + +void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad8x16x8 vp9_sad8x16x8_c + +unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad8x4 vp9_sad8x4_c + +unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad8x4_avg vp9_sad8x4_avg_c + +void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad8x4x4d vp9_sad8x4x4d_c + +void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad8x4x8 vp9_sad8x4x8_c + +unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); +#define vp9_sad8x8 vp9_sad8x8_c + +unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad); +#define vp9_sad8x8_avg vp9_sad8x8_avg_c + +void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); +#define vp9_sad8x8x3 vp9_sad8x8x3_c + +void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); +#define vp9_sad8x8x4d vp9_sad8x8x4d_c + +void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define vp9_sad8x8x8 vp9_sad8x8x8_c + +unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance16x16 vp9_sub_pixel_avg_variance16x16_c + +unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance16x32 vp9_sub_pixel_avg_variance16x32_c + +unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance16x8 vp9_sub_pixel_avg_variance16x8_c + +unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance32x16 vp9_sub_pixel_avg_variance32x16_c + +unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance32x32 vp9_sub_pixel_avg_variance32x32_c + +unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance32x64 vp9_sub_pixel_avg_variance32x64_c + +unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance4x4 vp9_sub_pixel_avg_variance4x4_c + +unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance4x8 vp9_sub_pixel_avg_variance4x8_c + +unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance64x32 vp9_sub_pixel_avg_variance64x32_c + +unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance64x64 vp9_sub_pixel_avg_variance64x64_c + +unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance8x16 vp9_sub_pixel_avg_variance8x16_c + +unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance8x4 vp9_sub_pixel_avg_variance8x4_c + +unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); +#define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c + +unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c + +unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance16x32 vp9_sub_pixel_variance16x32_c + +unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance16x8 vp9_sub_pixel_variance16x8_c + +unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance32x16 vp9_sub_pixel_variance32x16_c + +unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance32x32 vp9_sub_pixel_variance32x32_c + +unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance32x64 vp9_sub_pixel_variance32x64_c + +unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance4x4 vp9_sub_pixel_variance4x4_c + +unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance4x8 vp9_sub_pixel_variance4x8_c + +unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance64x32 vp9_sub_pixel_variance64x32_c + +unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance64x64 vp9_sub_pixel_variance64x64_c + +unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance8x16 vp9_sub_pixel_variance8x16_c + +unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance8x4 vp9_sub_pixel_variance8x4_c + +unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_sub_pixel_variance8x8 vp9_sub_pixel_variance8x8_c + +void vp9_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride); +#define vp9_subtract_block vp9_subtract_block_c + +void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +#define vp9_temporal_filter_apply vp9_temporal_filter_apply_c + +void vp9_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_tm_predictor_16x16 vp9_tm_predictor_16x16_c + +void vp9_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_tm_predictor_32x32 vp9_tm_predictor_32x32_c + +void vp9_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_tm_predictor_4x4 vp9_tm_predictor_4x4_c + +void vp9_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_tm_predictor_8x8 vp9_tm_predictor_8x8_c + +void vp9_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_v_predictor_16x16 vp9_v_predictor_16x16_c + +void vp9_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_v_predictor_32x32 vp9_v_predictor_32x32_c + +void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_v_predictor_4x4 vp9_v_predictor_4x4_c + +void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c + +unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance16x16 vp9_variance16x16_c + +unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance16x32 vp9_variance16x32_c + +unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance16x8 vp9_variance16x8_c + +unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance32x16 vp9_variance32x16_c + +unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance32x32 vp9_variance32x32_c + +unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance32x64 vp9_variance32x64_c + +unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance4x4 vp9_variance4x4_c + +unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance4x8 vp9_variance4x8_c + +unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance64x32 vp9_variance64x32_c + +unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance64x64 vp9_variance64x64_c + +unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance8x16 vp9_variance8x16_c + +unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance8x4 vp9_variance8x4_c + +unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define vp9_variance8x8 vp9_variance8x8_c + +void vp9_rtcd(void); + +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/source/config/linux/arm64/vpx_config.asm b/source/config/linux/arm64/vpx_config.asm new file mode 100644 index 0000000..d4ae581 --- /dev/null +++ b/source/config/linux/arm64/vpx_config.asm @@ -0,0 +1,84 @@ +@ This file was created from a .asm file +@ using the ads2gas.pl script. + .equ DO1STROUNDING, 0 +.equ ARCH_ARM , 1 +.equ ARCH_MIPS , 0 +.equ ARCH_X86 , 0 +.equ ARCH_X86_64 , 0 +.equ ARCH_PPC32 , 0 +.equ ARCH_PPC64 , 0 +.equ HAVE_EDSP , 0 +.equ HAVE_MEDIA , 0 +.equ HAVE_NEON , 1 +.equ HAVE_NEON_ASM , 0 +.equ HAVE_MIPS32 , 0 +.equ HAVE_DSPR2 , 0 +.equ HAVE_MMX , 0 +.equ HAVE_SSE , 0 +.equ HAVE_SSE2 , 0 +.equ HAVE_SSE3 , 0 +.equ HAVE_SSSE3 , 0 +.equ HAVE_SSE4_1 , 0 +.equ HAVE_AVX , 0 +.equ HAVE_AVX2 , 0 +.equ HAVE_ALTIVEC , 0 +.equ HAVE_VPX_PORTS , 1 +.equ HAVE_STDINT_H , 1 +.equ HAVE_ALT_TREE_LAYOUT , 0 +.equ HAVE_PTHREAD_H , 1 +.equ HAVE_SYS_MMAN_H , 1 +.equ HAVE_UNISTD_H , 1 +.equ CONFIG_EXTERNAL_BUILD , 1 +.equ CONFIG_INSTALL_DOCS , 0 +.equ CONFIG_INSTALL_BINS , 1 +.equ CONFIG_INSTALL_LIBS , 1 +.equ CONFIG_INSTALL_SRCS , 0 +.equ CONFIG_USE_X86INC , 1 +.equ CONFIG_DEBUG , 0 +.equ CONFIG_GPROF , 0 +.equ CONFIG_GCOV , 0 +.equ CONFIG_RVCT , 0 +.equ CONFIG_GCC , 1 +.equ CONFIG_MSVS , 0 +.equ CONFIG_PIC , 1 +.equ CONFIG_BIG_ENDIAN , 0 +.equ CONFIG_CODEC_SRCS , 0 +.equ CONFIG_DEBUG_LIBS , 0 +.equ CONFIG_FAST_UNALIGNED , 1 +.equ CONFIG_MEM_MANAGER , 0 +.equ CONFIG_MEM_TRACKER , 0 +.equ CONFIG_MEM_CHECKS , 0 +.equ CONFIG_DEQUANT_TOKENS , 0 +.equ CONFIG_DC_RECON , 0 +.equ CONFIG_RUNTIME_CPU_DETECT , 0 +.equ CONFIG_POSTPROC , 1 +.equ CONFIG_VP9_POSTPROC , 0 +.equ CONFIG_MULTITHREAD , 1 +.equ CONFIG_INTERNAL_STATS , 0 +.equ CONFIG_VP8_ENCODER , 1 +.equ CONFIG_VP8_DECODER , 1 +.equ CONFIG_VP9_ENCODER , 1 +.equ CONFIG_VP9_DECODER , 1 +.equ CONFIG_VP8 , 1 +.equ CONFIG_VP9 , 1 +.equ CONFIG_ENCODERS , 1 +.equ CONFIG_DECODERS , 1 +.equ CONFIG_STATIC_MSVCRT , 0 +.equ CONFIG_SPATIAL_RESAMPLING , 1 +.equ CONFIG_REALTIME_ONLY , 1 +.equ CONFIG_ONTHEFLY_BITPACKING , 0 +.equ CONFIG_ERROR_CONCEALMENT , 0 +.equ CONFIG_SHARED , 0 +.equ CONFIG_STATIC , 1 +.equ CONFIG_SMALL , 0 +.equ CONFIG_POSTPROC_VISUALIZER , 0 +.equ CONFIG_OS_SUPPORT , 1 +.equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 +.equ CONFIG_DECODE_PERF_TESTS , 0 +.equ CONFIG_MULTI_RES_ENCODING , 1 +.equ CONFIG_TEMPORAL_DENOISING , 1 +.equ CONFIG_EXPERIMENTAL , 0 +.equ CONFIG_MULTIPLE_ARF , 0 +.equ CONFIG_ALPHA , 0 + .section .note.GNU-stack,"",%progbits diff --git a/source/config/linux/arm64/vpx_config.c b/source/config/linux/arm64/vpx_config.c new file mode 100644 index 0000000..9ef6c2a --- /dev/null +++ b/source/config/linux/arm64/vpx_config.c @@ -0,0 +1,9 @@ +/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ +/* */ +/* Use of this source code is governed by a BSD-style license */ +/* that can be found in the LICENSE file in the root of the source */ +/* tree. An additional intellectual property rights grant can be found */ +/* in the file PATENTS. All contributing project authors may */ +/* be found in the AUTHORS file in the root of the source tree. */ +static const char* const cfg = "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"; +const char *vpx_codec_build_config(void) {return cfg;} diff --git a/source/config/linux/arm64/vpx_config.h b/source/config/linux/arm64/vpx_config.h new file mode 100644 index 0000000..8cb9cb0 --- /dev/null +++ b/source/config/linux/arm64/vpx_config.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ +/* */ +/* Use of this source code is governed by a BSD-style license */ +/* that can be found in the LICENSE file in the root of the source */ +/* tree. An additional intellectual property rights grant can be found */ +/* in the file PATENTS. All contributing project authors may */ +/* be found in the AUTHORS file in the root of the source tree. */ +/* This file automatically generated by configure. Do not edit! */ +#ifndef VPX_CONFIG_H +#define VPX_CONFIG_H +#define RESTRICT +#define INLINE __inline__ __attribute__((always_inline)) +#define ARCH_ARM 1 +#define ARCH_MIPS 0 +#define ARCH_X86 0 +#define ARCH_X86_64 0 +#define ARCH_PPC32 0 +#define ARCH_PPC64 0 +#define HAVE_EDSP 0 +#define HAVE_MEDIA 0 +#define HAVE_NEON 1 +#define HAVE_NEON_ASM 0 +#define HAVE_MIPS32 0 +#define HAVE_DSPR2 0 +#define HAVE_MMX 0 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define HAVE_SSE3 0 +#define HAVE_SSSE3 0 +#define HAVE_SSE4_1 0 +#define HAVE_AVX 0 +#define HAVE_AVX2 0 +#define HAVE_ALTIVEC 0 +#define HAVE_VPX_PORTS 1 +#define HAVE_STDINT_H 1 +#define HAVE_ALT_TREE_LAYOUT 0 +#define HAVE_PTHREAD_H 1 +#define HAVE_SYS_MMAN_H 1 +#define HAVE_UNISTD_H 1 +#define CONFIG_EXTERNAL_BUILD 1 +#define CONFIG_INSTALL_DOCS 0 +#define CONFIG_INSTALL_BINS 1 +#define CONFIG_INSTALL_LIBS 1 +#define CONFIG_INSTALL_SRCS 0 +#define CONFIG_USE_X86INC 1 +#define CONFIG_DEBUG 0 +#define CONFIG_GPROF 0 +#define CONFIG_GCOV 0 +#define CONFIG_RVCT 0 +#define CONFIG_GCC 1 +#define CONFIG_MSVS 0 +#define CONFIG_PIC 1 +#define CONFIG_BIG_ENDIAN 0 +#define CONFIG_CODEC_SRCS 0 +#define CONFIG_DEBUG_LIBS 0 +#define CONFIG_FAST_UNALIGNED 1 +#define CONFIG_MEM_MANAGER 0 +#define CONFIG_MEM_TRACKER 0 +#define CONFIG_MEM_CHECKS 0 +#define CONFIG_DEQUANT_TOKENS 0 +#define CONFIG_DC_RECON 0 +#define CONFIG_RUNTIME_CPU_DETECT 0 +#define CONFIG_POSTPROC 1 +#define CONFIG_VP9_POSTPROC 0 +#define CONFIG_MULTITHREAD 1 +#define CONFIG_INTERNAL_STATS 0 +#define CONFIG_VP8_ENCODER 1 +#define CONFIG_VP8_DECODER 1 +#define CONFIG_VP9_ENCODER 1 +#define CONFIG_VP9_DECODER 1 +#define CONFIG_VP8 1 +#define CONFIG_VP9 1 +#define CONFIG_ENCODERS 1 +#define CONFIG_DECODERS 1 +#define CONFIG_STATIC_MSVCRT 0 +#define CONFIG_SPATIAL_RESAMPLING 1 +#define CONFIG_REALTIME_ONLY 1 +#define CONFIG_ONTHEFLY_BITPACKING 0 +#define CONFIG_ERROR_CONCEALMENT 0 +#define CONFIG_SHARED 0 +#define CONFIG_STATIC 1 +#define CONFIG_SMALL 0 +#define CONFIG_POSTPROC_VISUALIZER 0 +#define CONFIG_OS_SUPPORT 1 +#define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 +#define CONFIG_DECODE_PERF_TESTS 0 +#define CONFIG_MULTI_RES_ENCODING 1 +#define CONFIG_TEMPORAL_DENOISING 1 +#define CONFIG_EXPERIMENTAL 0 +#define CONFIG_MULTIPLE_ARF 0 +#define CONFIG_ALPHA 0 +#endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/arm64/vpx_scale_rtcd.h b/source/config/linux/arm64/vpx_scale_rtcd.h new file mode 100644 index 0000000..0a6d790 --- /dev/null +++ b/source/config/linux/arm64/vpx_scale_rtcd.h @@ -0,0 +1,71 @@ +#ifndef VPX_SCALE_RTCD_H_ +#define VPX_SCALE_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct yv12_buffer_config; + +void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define vp8_horizontal_line_2_1_scale vp8_horizontal_line_2_1_scale_c + +void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define vp8_horizontal_line_5_3_scale vp8_horizontal_line_5_3_scale_c + +void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define vp8_horizontal_line_5_4_scale vp8_horizontal_line_5_4_scale_c + +void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); +#define vp8_vertical_band_2_1_scale vp8_vertical_band_2_1_scale_c + +void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); +#define vp8_vertical_band_2_1_scale_i vp8_vertical_band_2_1_scale_i_c + +void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); +#define vp8_vertical_band_5_3_scale vp8_vertical_band_5_3_scale_c + +void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); +#define vp8_vertical_band_5_4_scale vp8_vertical_band_5_4_scale_c + +void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c + +void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c + +void vp9_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define vp9_extend_frame_borders vp9_extend_frame_borders_c + +void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); +#define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c + +void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vpx_yv12_copy_y vpx_yv12_copy_y_c + +void vpx_scale_rtcd(void); + +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/source/config/linux/generic/vp8_rtcd.h b/source/config/linux/generic/vp8_rtcd.h index d6de728..ef9fa5f 100644 --- a/source/config/linux/generic/vp8_rtcd.h +++ b/source/config/linux/generic/vp8_rtcd.h @@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_c void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h index c42a60b..fcb14ab 100644 --- a/source/config/linux/generic/vp9_rtcd.h +++ b/source/config/linux/generic/vp9_rtcd.h @@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm index 4e01f9c..b94e4f0 100644 --- a/source/config/linux/generic/vpx_config.asm +++ b/source/config/linux/generic/vpx_config.asm @@ -10,6 +10,7 @@ .equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 0 .equ HAVE_NEON , 0 +.equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 .equ HAVE_MMX , 0 diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h index a0ffc4e..122c0e7 100644 --- a/source/config/linux/generic/vpx_config.h +++ b/source/config/linux/generic/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/linux/ia32/vp8_rtcd.h b/source/config/linux/ia32/vp8_rtcd.h index 7e90462..fc0f7a2 100644 --- a/source/config/linux/ia32/vp8_rtcd.h +++ b/source/config/linux/ia32/vp8_rtcd.h @@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h index 40965d0..9482f75 100644 --- a/source/config/linux/ia32/vp9_rtcd.h +++ b/source/config/linux/ia32/vp9_rtcd.h @@ -263,7 +263,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); -#define vp9_fwht4x4 vp9_fwht4x4_c +void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride); +RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); unsigned int vp9_get_mb_ss_mmx(const int16_t *); @@ -318,9 +319,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -951,6 +952,8 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; + vp9_fwht4x4 = vp9_fwht4x4_c; + if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx; vp9_get_mb_ss = vp9_get_mb_ss_c; if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx; if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; @@ -978,8 +981,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; - if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_c; + if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm index c7b5491..f783cc5 100644 --- a/source/config/linux/ia32/vpx_config.asm +++ b/source/config/linux/ia32/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h index 3ebfb59..0cbf5de 100644 --- a/source/config/linux/ia32/vpx_config.h +++ b/source/config/linux/ia32/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/config/linux/mipsel/vp8_rtcd.h b/source/config/linux/mipsel/vp8_rtcd.h index 72a7d9e..bfb056b 100644 --- a/source/config/linux/mipsel/vp8_rtcd.h +++ b/source/config/linux/mipsel/vp8_rtcd.h @@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_c void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h index c42a60b..fcb14ab 100644 --- a/source/config/linux/mipsel/vp9_rtcd.h +++ b/source/config/linux/mipsel/vp9_rtcd.h @@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h index 7b7fd99..a39eb99 100644 --- a/source/config/linux/mipsel/vpx_config.h +++ b/source/config/linux/mipsel/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 1 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/linux/x64/vp8_rtcd.h b/source/config/linux/x64/vp8_rtcd.h index 9653130..d2ebae0 100644 --- a/source/config/linux/x64/vp8_rtcd.h +++ b/source/config/linux/x64/vp8_rtcd.h @@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_sse2 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h index b7056fa..beb342b 100644 --- a/source/config/linux/x64/vp9_rtcd.h +++ b/source/config/linux/x64/vp9_rtcd.h @@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); -#define vp9_fwht4x4 vp9_fwht4x4_c +void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride); +#define vp9_fwht4x4 vp9_fwht4x4_mmx unsigned int vp9_get_mb_ss_c(const int16_t *); unsigned int vp9_get_mb_ss_mmx(const int16_t *); @@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2 -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2 +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -930,6 +932,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; + if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_quantize_b = vp9_quantize_b_c; diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm index 37939c6..9ebc29f 100644 --- a/source/config/linux/x64/vpx_config.asm +++ b/source/config/linux/x64/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h index be57865..169de86 100644 --- a/source/config/linux/x64/vpx_config.h +++ b/source/config/linux/x64/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/config/mac/ia32/vp8_rtcd.h b/source/config/mac/ia32/vp8_rtcd.h index 7e90462..fc0f7a2 100644 --- a/source/config/mac/ia32/vp8_rtcd.h +++ b/source/config/mac/ia32/vp8_rtcd.h @@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h index 45544eb..28ae79b 100644 --- a/source/config/mac/ia32/vp9_rtcd.h +++ b/source/config/mac/ia32/vp9_rtcd.h @@ -292,9 +292,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -800,8 +800,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; - if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_c; + if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm index 3903e04..5884bb1 100644 --- a/source/config/mac/ia32/vpx_config.asm +++ b/source/config/mac/ia32/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h index d38d8ca..c180d92 100644 --- a/source/config/mac/ia32/vpx_config.h +++ b/source/config/mac/ia32/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/config/mac/x64/vp8_rtcd.h b/source/config/mac/x64/vp8_rtcd.h index 9653130..d2ebae0 100644 --- a/source/config/mac/x64/vp8_rtcd.h +++ b/source/config/mac/x64/vp8_rtcd.h @@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_sse2 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h index b7056fa..beb342b 100644 --- a/source/config/mac/x64/vp9_rtcd.h +++ b/source/config/mac/x64/vp9_rtcd.h @@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); -#define vp9_fwht4x4 vp9_fwht4x4_c +void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride); +#define vp9_fwht4x4 vp9_fwht4x4_mmx unsigned int vp9_get_mb_ss_c(const int16_t *); unsigned int vp9_get_mb_ss_mmx(const int16_t *); @@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2 -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2 +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -930,6 +932,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; + if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_quantize_b = vp9_quantize_b_c; diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm index 37939c6..9ebc29f 100644 --- a/source/config/mac/x64/vpx_config.asm +++ b/source/config/mac/x64/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h index be57865..169de86 100644 --- a/source/config/mac/x64/vpx_config.h +++ b/source/config/mac/x64/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/config/nacl/vp8_rtcd.h b/source/config/nacl/vp8_rtcd.h index d6de728..ef9fa5f 100644 --- a/source/config/nacl/vp8_rtcd.h +++ b/source/config/nacl/vp8_rtcd.h @@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_c void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h index c42a60b..fcb14ab 100644 --- a/source/config/nacl/vp9_rtcd.h +++ b/source/config/nacl/vp9_rtcd.h @@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm index 4e01f9c..b94e4f0 100644 --- a/source/config/nacl/vpx_config.asm +++ b/source/config/nacl/vpx_config.asm @@ -10,6 +10,7 @@ .equ HAVE_EDSP , 0 .equ HAVE_MEDIA , 0 .equ HAVE_NEON , 0 +.equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 .equ HAVE_MMX , 0 diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h index a0ffc4e..122c0e7 100644 --- a/source/config/nacl/vpx_config.h +++ b/source/config/nacl/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 0 diff --git a/source/config/win/ia32/vp8_rtcd.h b/source/config/win/ia32/vp8_rtcd.h index 7e90462..fc0f7a2 100644 --- a/source/config/win/ia32/vp8_rtcd.h +++ b/source/config/win/ia32/vp8_rtcd.h @@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h index 40965d0..9482f75 100644 --- a/source/config/win/ia32/vp9_rtcd.h +++ b/source/config/win/ia32/vp9_rtcd.h @@ -263,7 +263,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); -#define vp9_fwht4x4 vp9_fwht4x4_c +void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride); +RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); unsigned int vp9_get_mb_ss_mmx(const int16_t *); @@ -318,9 +319,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -951,6 +952,8 @@ static void setup_rtcd_internal(void) vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; + vp9_fwht4x4 = vp9_fwht4x4_c; + if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx; vp9_get_mb_ss = vp9_get_mb_ss_c; if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx; if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; @@ -978,8 +981,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; - if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_c; + if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm index d5677dd..3b1a8de 100644 --- a/source/config/win/ia32/vpx_config.asm +++ b/source/config/win/ia32/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h index fb663d0..255ce65 100644 --- a/source/config/win/ia32/vpx_config.h +++ b/source/config/win/ia32/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/config/win/x64/vp8_rtcd.h b/source/config/win/x64/vp8_rtcd.h index 9653130..d2ebae0 100644 --- a/source/config/win/x64/vp8_rtcd.h +++ b/source/config/win/x64/vp8_rtcd.h @@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx -int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); -int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset); +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising); #define vp8_denoiser_filter vp8_denoiser_filter_sse2 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h index b7056fa..beb342b 100644 --- a/source/config/win/x64/vp9_rtcd.h +++ b/source/config/win/x64/vp9_rtcd.h @@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); -#define vp9_fwht4x4 vp9_fwht4x4_c +void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride); +#define vp9_fwht4x4 vp9_fwht4x4_mmx unsigned int vp9_get_mb_ss_c(const int16_t *); unsigned int vp9_get_mb_ss_mmx(const int16_t *); @@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2 -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride); -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); -#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2 +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); +void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride); void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride); @@ -930,6 +932,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; + vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; + if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3; vp9_quantize_b = vp9_quantize_b_c; diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm index 6617125..b9288b7 100644 --- a/source/config/win/x64/vpx_config.asm +++ b/source/config/win/x64/vpx_config.asm @@ -7,6 +7,7 @@ ARCH_PPC64 equ 0 HAVE_EDSP equ 0 HAVE_MEDIA equ 0 HAVE_NEON equ 0 +HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 HAVE_MMX equ 1 diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h index 4de3b21..5181177 100644 --- a/source/config/win/x64/vpx_config.h +++ b/source/config/win/x64/vpx_config.h @@ -19,6 +19,7 @@ #define HAVE_EDSP 0 #define HAVE_MEDIA 0 #define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 #define HAVE_MMX 1 diff --git a/source/libvpx/README b/source/libvpx/README index ce9c1c6..6d7d5ec 100644 --- a/source/libvpx/README +++ b/source/libvpx/README @@ -12,22 +12,20 @@ COMPILING THE APPLICATIONS/LIBRARIES: * All x86 targets require the Yasm[1] assembler be installed. * All Windows builds require that Cygwin[2] be installed. - * Building the documentation requires PHP[3] and Doxygen[4]. If you do not - have these packages, you must pass --disable-install-docs to the - configure script. - * Downloading the data for the unit tests requires curl[5] and sha1sum. + * Building the documentation requires Doxygen[3]. If you do not + have this package, the install-docs option will be disabled. + * Downloading the data for the unit tests requires curl[4] and sha1sum. sha1sum is provided via the GNU coreutils, installed by default on many *nix platforms, as well as MinGW and Cygwin. If coreutils is not available, a compatible version of sha1sum can be built from - source[6]. These requirements are optional if not running the unit + source[5]. These requirements are optional if not running the unit tests. [1]: http://www.tortall.net/projects/yasm [2]: http://www.cygwin.com - [3]: http://php.net - [4]: http://www.doxygen.org - [5]: http://curl.haxx.se - [6]: http://www.microbrew.org/tools/md5sha1sum/ + [3]: http://www.doxygen.org + [4]: http://curl.haxx.se + [5]: http://www.microbrew.org/tools/md5sha1sum/ 2. Out-of-tree builds Out of tree builds are a supported method of building the application. For diff --git a/source/libvpx/build/make/Android.mk b/source/libvpx/build/make/Android.mk index 369c2a5..816334e 100644 --- a/source/libvpx/build/make/Android.mk +++ b/source/libvpx/build/make/Android.mk @@ -38,8 +38,9 @@ # For this we import the 'cpufeatures' module from the NDK sources. # libvpx can also be configured without this runtime detection method. # Configuring with --disable-runtime-cpu-detect will assume presence of NEON. -# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any -# NEON dependency. +# Configuring with --disable-runtime-cpu-detect --disable-neon \ +# --disable-neon-asm +# will remove any NEON dependency. # To change to building armeabi, run ./libvpx/configure again, but with # --target=arm5te-android-gcc and modify the Application.mk file to @@ -61,6 +62,9 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) else ifeq ($(TARGET_ARCH_ABI),armeabi) include $(CONFIG_DIR)libs-armv5te-android-gcc.mk LOCAL_ARM_MODE := arm +else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) + include $(CONFIG_DIR)libs-armv8-android-gcc.mk + LOCAL_ARM_MODE := arm else ifeq ($(TARGET_ARCH_ABI),x86) include $(CONFIG_DIR)libs-x86-android-gcc.mk else ifeq ($(TARGET_ARCH_ABI),mips) @@ -126,7 +130,7 @@ endef ifeq ($(CONFIG_VP8_ENCODER), yes) ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm endif -ifeq ($(HAVE_NEON), yes) +ifeq ($(HAVE_NEON_ASM), yes) ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm endif @@ -153,7 +157,11 @@ LOCAL_NEON_SRCS_C = $(filter %_neon.c, $(CODEC_SRCS_C)) LOCAL_CODEC_SRCS_C = $(filter-out vpx_config.c %_neon.c, $(CODEC_SRCS_C)) LOCAL_SRC_FILES += $(foreach file, $(LOCAL_CODEC_SRCS_C), libvpx/$(file)) -LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon) +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon) +else # If there are neon sources then we are building for arm64 and do not need to specify .neon + LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file)) +endif # Pull out assembly files, splitting NEON from the rest. This is # done to specify that the NEON assembly files use NEON assembler flags. diff --git a/source/libvpx/build/make/configure.sh b/source/libvpx/build/make/configure.sh index 4c3b05f..c07b049 100755 --- a/source/libvpx/build/make/configure.sh +++ b/source/libvpx/build/make/configure.sh @@ -518,7 +518,7 @@ process_common_cmdline() { --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then - [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}${opt} " + [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} " elif [ $action = "disable" ] && ! disabled $option ; then echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null || die_unknown $opt @@ -792,8 +792,12 @@ process_common_toolchain() { arm*) # on arm, isa versions are supersets case ${tgt_isa} in + armv8) + soft_enable neon + ;; armv7) soft_enable neon + soft_enable neon_asm soft_enable media soft_enable edsp soft_enable fast_unaligned @@ -831,7 +835,7 @@ EOF check_add_cflags -march=armv7-a -mfloat-abi=${float_abi} check_add_asflags -march=armv7-a -mfloat-abi=${float_abi} - if enabled neon + if enabled neon || enabled neon_asm then check_add_cflags -mfpu=neon #-ftree-vectorize check_add_asflags -mfpu=neon @@ -878,7 +882,7 @@ EOF tune_asflags="--cpu=" if [ -z "${tune_cpu}" ]; then if [ ${tgt_isa} = "armv7" ]; then - if enabled neon + if enabled neon || enabled neon_asm then check_add_cflags --fpu=softvfp+vfpv3 check_add_asflags --fpu=softvfp+vfpv3 diff --git a/source/libvpx/build/make/rtcd.pl b/source/libvpx/build/make/rtcd.pl index 18ee80d..f5f59b1 100755 --- a/source/libvpx/build/make/rtcd.pl +++ b/source/libvpx/build/make/rtcd.pl @@ -272,6 +272,9 @@ sub arm() { # Assign the helper variable for each enabled extension foreach my $opt (@ALL_ARCHS) { my $opt_uc = uc $opt; + # Enable neon assembly based on HAVE_NEON logic instead of adding new + # HAVE_NEON_ASM logic + if ($opt eq 'neon_asm') { $opt_uc = 'NEON' } eval "\$have_${opt}=\"flags & HAS_${opt_uc}\""; } @@ -381,7 +384,10 @@ if ($opts{arch} eq 'x86') { @ALL_ARCHS = filter(qw/edsp media/); arm; } elsif ($opts{arch} eq 'armv7') { - @ALL_ARCHS = filter(qw/edsp media neon/); + @ALL_ARCHS = filter(qw/edsp media neon_asm neon/); + arm; +} elsif ($opts{arch} eq 'armv8') { + @ALL_ARCHS = filter(qw/neon/); arm; } else { unoptimized; diff --git a/source/libvpx/configure b/source/libvpx/configure index 690ac48..bd95056 100755 --- a/source/libvpx/configure +++ b/source/libvpx/configure @@ -189,7 +189,7 @@ fi # install everything except the sources, by default. sources will have # to be enabled when doing dist builds, since that's no longer a common # case. -enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs +enabled doxygen && enable_feature install_docs enable_feature install_bins enable_feature install_libs @@ -239,6 +239,7 @@ ARCH_EXT_LIST=" edsp media neon + neon_asm mips32 dspr2 diff --git a/source/libvpx/docs.mk b/source/libvpx/docs.mk index 797b466..889d182 100644 --- a/source/libvpx/docs.mk +++ b/source/libvpx/docs.mk @@ -23,12 +23,6 @@ CODEC_DOX := mainpage.dox \ # Other doxy files sourced in Markdown TXT_DOX = $(call enabled,TXT_DOX) -%.dox: %.txt - @echo " [DOXY] $@" - @$(SRC_PATH_BARE)/examples/gen_example_doxy.php \ - $(@:.dox=) "$($@.DESC)" > $@ < $< - - EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc EXAMPLE_PATH += $(SRC_PATH_BARE)/examples diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk index f6e7c00..28ab33a 100644 --- a/source/libvpx/examples.mk +++ b/source/libvpx/examples.mk @@ -67,32 +67,27 @@ ifeq ($(CONFIG_WEBM_IO),yes) endif vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder -EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c -vp9_spatial_scalable_encoder.SRCS += args.c args.h -vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h -vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h -vp9_spatial_scalable_encoder.SRCS += video_common.h -vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c -vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h -vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D -vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder +EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_svc_encoder.c +vp9_spatial_svc_encoder.SRCS += args.c args.h +vp9_spatial_svc_encoder.SRCS += ivfenc.c ivfenc.h +vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h +vp9_spatial_svc_encoder.SRCS += video_common.h +vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c +vp9_spatial_svc_encoder.SRCS += vpxstats.c vpxstats.h +vp9_spatial_svc_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D +vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder ifneq ($(CONFIG_SHARED),yes) EXAMPLES-$(CONFIG_VP9_ENCODER) += resize_util.c endif -# XMA example disabled for now, not used in VP8 -#UTILS-$(CONFIG_DECODERS) += example_xma.c -#example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022 -#example_xma.DESCRIPTION = External Memory Allocation mode usage - -EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_scalable_patterns.c -vpx_temporal_scalable_patterns.SRCS += ivfenc.c ivfenc.h -vpx_temporal_scalable_patterns.SRCS += tools_common.c tools_common.h -vpx_temporal_scalable_patterns.SRCS += video_common.h -vpx_temporal_scalable_patterns.SRCS += video_writer.h video_writer.c -vpx_temporal_scalable_patterns.GUID = B18C08F2-A439-4502-A78E-849BE3D60947 -vpx_temporal_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder +EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_svc_encoder.c +vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h +vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h +vpx_temporal_svc_encoder.SRCS += video_common.h +vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c +vpx_temporal_svc_encoder.GUID = B18C08F2-A439-4502-A78E-849BE3D60947 +vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC simple_decoder.SRCS += ivfdec.h ivfdec.c @@ -146,11 +141,6 @@ decode_with_drops.SRCS += vpx_ports/mem_ops_aligned.h endif decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26 decode_with_drops.DESCRIPTION = Drops frames while decoding -ifeq ($(CONFIG_VP8_DECODER),yes) -EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c -endif -decode_with_partial_drops.GUID = 61C2D026-5754-46AC-916F-1343ECC5537E -decode_with_partial_drops.DESCRIPTION = Drops parts of frames while decoding EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c set_maps.SRCS += ivfenc.h ivfenc.c set_maps.SRCS += tools_common.h tools_common.c diff --git a/source/libvpx/examples/decode_with_partial_drops.c b/source/libvpx/examples/decode_with_partial_drops.c deleted file mode 100644 index d7132de..0000000 --- a/source/libvpx/examples/decode_with_partial_drops.c +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Decode With Partial Drops Example -// ========================= -// -// This is an example utility which drops a series of frames (or parts of -// frames), as specified on the command line. This is useful for observing the -// error recovery features of the codec. -// -// Usage -// ----- -// This example adds a single argument to the `simple_decoder` example, -// which specifies the range or pattern of frames to drop. The parameter is -// parsed as follows. -// -// Dropping A Range Of Frames -// -------------------------- -// To drop a range of frames, specify the starting frame and the ending -// frame to drop, separated by a dash. The following command will drop -// frames 5 through 10 (base 1). -// -// $ ./decode_with_partial_drops in.ivf out.i420 5-10 -// -// -// Dropping A Pattern Of Frames -// ---------------------------- -// To drop a pattern of frames, specify the number of frames to drop and -// the number of frames after which to repeat the pattern, separated by -// a forward-slash. The following command will drop 3 of 7 frames. -// Specifically, it will decode 4 frames, then drop 3 frames, and then -// repeat. -// -// $ ./decode_with_partial_drops in.ivf out.i420 3/7 -// -// Dropping Random Parts Of Frames -// ------------------------------- -// A third argument tuple is available to split the frame into 1500 bytes pieces -// and randomly drop pieces rather than frames. The frame will be split at -// partition boundaries where possible. The following example will seed the RNG -// with the seed 123 and drop approximately 5% of the pieces. Pieces which -// are depending on an already dropped piece will also be dropped. -// -// $ ./decode_with_partial_drops in.ivf out.i420 5,123 -// -// Extra Variables -// --------------- -// This example maintains the pattern passed on the command line in the -// `n`, `m`, and `is_range` variables: -// -// Making The Drop Decision -// ------------------------ -// The example decides whether to drop the frame based on the current -// frame number, immediately before decoding the frame. - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "./vpx_config.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_decoder.h" -#define interface (vpx_codec_vp8_dx()) -#include <time.h> - - -#define IVF_FILE_HDR_SZ (32) -#define IVF_FRAME_HDR_SZ (12) - -static unsigned int mem_get_le32(const unsigned char *mem) { - return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]); -} - -static void die(const char *fmt, ...) { - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - if(fmt[strlen(fmt)-1] != '\n') - printf("\n"); - exit(EXIT_FAILURE); -} - -static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { - const char *detail = vpx_codec_error_detail(ctx); - - printf("%s: %s\n", s, vpx_codec_error(ctx)); - if(detail) - printf(" %s\n",detail); - exit(EXIT_FAILURE); -} - -struct parsed_header -{ - char key_frame; - int version; - char show_frame; - int first_part_size; -}; - -int next_packet(struct parsed_header* hdr, int pos, int length, int mtu) -{ - int size = 0; - int remaining = length - pos; - /* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */ - int uncomp_part_size = (hdr->key_frame ? 10 : 3); - /* number of bytes yet to send from header and the first partition */ - int remainFirst = uncomp_part_size + hdr->first_part_size - pos; - if (remainFirst > 0) - { - if (remainFirst <= mtu) - { - size = remainFirst; - } - else - { - size = mtu; - } - - return size; - } - - /* second partition; just slot it up according to MTU */ - if (remaining <= mtu) - { - size = remaining; - return size; - } - return mtu; -} - -void throw_packets(unsigned char* frame, int* size, int loss_rate, - int* thrown, int* kept) -{ - unsigned char loss_frame[256*1024]; - int pkg_size = 1; - int pos = 0; - int loss_pos = 0; - struct parsed_header hdr; - unsigned int tmp; - int mtu = 1500; - - if (*size < 3) - { - return; - } - putc('|', stdout); - /* parse uncompressed 3 bytes */ - tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0]; - hdr.key_frame = !(tmp & 0x1); /* inverse logic */ - hdr.version = (tmp >> 1) & 0x7; - hdr.show_frame = (tmp >> 4) & 0x1; - hdr.first_part_size = (tmp >> 5) & 0x7FFFF; - - /* don't drop key frames */ - if (hdr.key_frame) - { - int i; - *kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */ - for (i=0; i < *kept; i++) - putc('.', stdout); - return; - } - - while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0) - { - int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0); - if (*thrown == 0 && !loss_event) - { - memcpy(loss_frame + loss_pos, frame + pos, pkg_size); - loss_pos += pkg_size; - (*kept)++; - putc('.', stdout); - } - else - { - (*thrown)++; - putc('X', stdout); - } - pos += pkg_size; - } - memcpy(frame, loss_frame, loss_pos); - memset(frame + loss_pos, 0, *size - loss_pos); - *size = loss_pos; -} - -int main(int argc, char **argv) { - FILE *infile, *outfile; - vpx_codec_ctx_t codec; - int flags = 0, frame_cnt = 0; - unsigned char file_hdr[IVF_FILE_HDR_SZ]; - unsigned char frame_hdr[IVF_FRAME_HDR_SZ]; - unsigned char frame[256*1024]; - vpx_codec_err_t res; - int n, m, mode; - unsigned int seed; - int thrown=0, kept=0; - int thrown_frame=0, kept_frame=0; - vpx_codec_dec_cfg_t dec_cfg = {0}; - - (void)res; - /* Open files */ - if(argc < 4 || argc > 6) - die("Usage: %s <infile> <outfile> [-t <num threads>] <N-M|N/M|L,S>\n", - argv[0]); - { - char *nptr; - int arg_num = 3; - if (argc == 6 && strncmp(argv[arg_num++], "-t", 2) == 0) - dec_cfg.threads = strtol(argv[arg_num++], NULL, 0); - n = strtol(argv[arg_num], &nptr, 0); - mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0; - - m = strtol(nptr+1, NULL, 0); - if((!n && !m) || (*nptr != '-' && *nptr != '/' && - *nptr != '\0' && *nptr != ',')) - die("Couldn't parse pattern %s\n", argv[3]); - } - seed = (m > 0) ? m : (unsigned int)time(NULL); - srand(seed);thrown_frame = 0; - printf("Seed: %u\n", seed); - printf("Threads: %d\n", dec_cfg.threads); - if(!(infile = fopen(argv[1], "rb"))) - die("Failed to open %s for reading", argv[1]); - if(!(outfile = fopen(argv[2], "wb"))) - die("Failed to open %s for writing", argv[2]); - - /* Read file header */ - if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ - && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I' - && file_hdr[3]=='F')) - die("%s is not an IVF file.", argv[1]); - - printf("Using %s\n",vpx_codec_iface_name(interface)); - /* Initialize codec */ - flags = VPX_CODEC_USE_ERROR_CONCEALMENT; - res = vpx_codec_dec_init(&codec, interface, &dec_cfg, flags); - if(res) - die_codec(&codec, "Failed to initialize decoder"); - - - /* Read each frame */ - while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) { - int frame_sz = mem_get_le32(frame_hdr); - vpx_codec_iter_t iter = NULL; - vpx_image_t *img; - - - frame_cnt++; - if(frame_sz > sizeof(frame)) - die("Frame %d data too big for example code buffer", frame_sz); - if(fread(frame, 1, frame_sz, infile) != frame_sz) - die("Frame %d failed to read complete frame", frame_cnt); - - /* Decide whether to throw parts of the frame or the whole frame - depending on the drop mode */ - thrown_frame = 0; - kept_frame = 0; - switch (mode) - { - case 0: - if (m - (frame_cnt-1)%m <= n) - { - frame_sz = 0; - } - break; - case 1: - if (frame_cnt >= n && frame_cnt <= m) - { - frame_sz = 0; - } - break; - case 2: - throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame); - break; - default: break; - } - if (mode < 2) - { - if (frame_sz == 0) - { - putc('X', stdout); - thrown_frame++; - } - else - { - putc('.', stdout); - kept_frame++; - } - } - thrown += thrown_frame; - kept += kept_frame; - fflush(stdout); - /* Decode the frame */ - if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0)) - die_codec(&codec, "Failed to decode frame"); - - /* Write decoded data to disk */ - while((img = vpx_codec_get_frame(&codec, &iter))) { - unsigned int plane, y; - - for(plane=0; plane < 3; plane++) { - unsigned char *buf =img->planes[plane]; - - for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) { - (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), - outfile); - buf += img->stride[plane]; - } - } - } - } - printf("Processed %d frames.\n",frame_cnt); - if(vpx_codec_destroy(&codec)) - die_codec(&codec, "Failed to destroy codec"); - - fclose(outfile); - fclose(infile); - return EXIT_SUCCESS; -} diff --git a/source/libvpx/examples/example_xma.c b/source/libvpx/examples/example_xma.c deleted file mode 100644 index c960c28..0000000 --- a/source/libvpx/examples/example_xma.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* This is a simple program showing how to initialize the decoder in XMA mode */ -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <string.h> -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx_config.h" -#include "vpx/vpx_decoder.h" -#include "vpx/vpx_integer.h" -#if CONFIG_VP9_DECODER -#include "vpx/vp8dx.h" -#endif - -static char *exec_name; -static int verbose = 0; - -static const struct { - const char *name; - vpx_codec_iface_t *iface; -} ifaces[] = { -#if CONFIG_VP9_DECODER - {"vp9", &vpx_codec_vp8_dx_algo}, -#endif -}; - -static void usage_exit(void) { - int i; - - printf("Usage: %s <options>\n\n" - "Options:\n" - "\t--codec <name>\tCodec to use (default=%s)\n" - "\t-h <height>\tHeight of the simulated video frame, in pixels\n" - "\t-w <width> \tWidth of the simulated video frame, in pixels\n" - "\t-v \tVerbose mode (show individual segment sizes)\n" - "\t--help \tShow this message\n" - "\n" - "Included decoders:\n" - "\n", - exec_name, - ifaces[0].name); - - for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) - printf(" %-6s - %s\n", - ifaces[i].name, - vpx_codec_iface_name(ifaces[i].iface)); - - exit(EXIT_FAILURE); -} - -static void usage_error(const char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vprintf(fmt, ap); - printf("\n"); - usage_exit(); -} - -void my_mem_dtor(vpx_codec_mmap_t *mmap) { - if (verbose) - printf("freeing segment %d\n", mmap->id); - - free(mmap->priv); -} - -int main(int argc, char **argv) { - vpx_codec_ctx_t decoder; - vpx_codec_iface_t *iface = ifaces[0].iface; - vpx_codec_iter_t iter; - vpx_codec_dec_cfg_t cfg; - vpx_codec_err_t res = VPX_CODEC_OK; - unsigned int alloc_sz = 0; - unsigned int w = 352; - unsigned int h = 288; - int i; - - exec_name = argv[0]; - - for (i = 1; i < argc; i++) { - if (!strcmp(argv[i], "--codec")) { - if (i + 1 < argc) { - int j, k = -1; - - i++; - - for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++) - if (!strcmp(ifaces[j].name, argv[i])) - k = j; - - if (k >= 0) - iface = ifaces[k].iface; - else - usage_error("Error: Unrecognized argument (%s) to --codec\n", - argv[i]); - } else - usage_error("Error: Option --codec requires argument.\n"); - } else if (!strcmp(argv[i], "-v")) - verbose = 1; - else if (!strcmp(argv[i], "-h")) - if (i + 1 < argc) { - h = atoi(argv[++i]); - } else - usage_error("Error: Option -h requires argument.\n"); - else if (!strcmp(argv[i], "-w")) - if (i + 1 < argc) { - w = atoi(argv[++i]); - } else - usage_error("Error: Option -w requires argument.\n"); - else if (!strcmp(argv[i], "--help")) - usage_exit(); - else - usage_error("Error: Unrecognized option %s\n\n", argv[i]); - } - - if (argc == 1) - printf("Using built-in defaults. For options, rerun with --help\n\n"); - - /* XMA mode is not supported on all decoders! */ - if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA)) { - printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface)); - return EXIT_FAILURE; - } - - /* The codec knows how much memory to allocate based on the size of the - * encoded frames. This data can be parsed from the bitstream with - * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise, - * a fixed size can be used that will be the upper limit on the frame - * size the decoder can decode. - */ - cfg.w = w; - cfg.h = h; - - /* Initialize the decoder in XMA mode. */ - if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA)) { - printf("Failed to initialize decoder in XMA mode: %s\n", - vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - - /* Iterate through the list of memory maps, allocating them with the - * requested alignment. - */ - iter = NULL; - - do { - vpx_codec_mmap_t mmap; - unsigned int align; - - res = vpx_codec_get_mem_map(&decoder, &mmap, &iter); - align = mmap.align ? mmap.align - 1 : 0; - - if (!res) { - if (verbose) - printf("Allocating segment %u, size %lu, align %u %s\n", - mmap.id, mmap.sz, mmap.align, - mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : ""); - - if (mmap.flags & VPX_CODEC_MEM_ZERO) - mmap.priv = calloc(1, mmap.sz + align); - else - mmap.priv = malloc(mmap.sz + align); - - mmap.base = (void *)((((uintptr_t)mmap.priv) + align) & - ~(uintptr_t)align); - mmap.dtor = my_mem_dtor; - alloc_sz += mmap.sz + align; - - if (vpx_codec_set_mem_map(&decoder, &mmap, 1)) { - printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - } else if (res != VPX_CODEC_LIST_END) { - printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - } while (res != VPX_CODEC_LIST_END); - - printf("%s\n %d bytes external memory required for %dx%d.\n", - decoder.name, alloc_sz, cfg.w, cfg.h); - vpx_codec_destroy(&decoder); - return EXIT_SUCCESS; - -} diff --git a/source/libvpx/examples/set_maps.c b/source/libvpx/examples/set_maps.c index 4343832..4ba38ee 100644 --- a/source/libvpx/examples/set_maps.c +++ b/source/libvpx/examples/set_maps.c @@ -64,7 +64,8 @@ void usage_exit() { static void set_roi_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { unsigned int i; - vpx_roi_map_t roi = {0}; + vpx_roi_map_t roi; + memset(&roi, 0, sizeof(roi)); roi.rows = (cfg->g_h + 15) / 16; roi.cols = (cfg->g_w + 15) / 16; @@ -97,7 +98,7 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg, static void set_active_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { unsigned int i; - vpx_active_map_t map = {0}; + vpx_active_map_t map = {0, 0, 0}; map.rows = (cfg->g_h + 15) / 16; map.cols = (cfg->g_w + 15) / 16; @@ -114,7 +115,7 @@ static void set_active_map(const vpx_codec_enc_cfg_t *cfg, static void unset_active_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { - vpx_active_map_t map = {0}; + vpx_active_map_t map = {0, 0, 0}; map.rows = (cfg->g_h + 15) / 16; map.cols = (cfg->g_w + 15) / 16; @@ -153,22 +154,23 @@ static void encode_frame(vpx_codec_ctx_t *codec, int main(int argc, char **argv) { FILE *infile = NULL; - vpx_codec_ctx_t codec = {0}; - vpx_codec_enc_cfg_t cfg = {0}; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; int frame_count = 0; - vpx_image_t raw = {0}; + vpx_image_t raw; vpx_codec_err_t res; - VpxVideoInfo info = {0}; + VpxVideoInfo info; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; const int fps = 2; // TODO(dkovalev) add command line argument const double bits_per_pixel_per_frame = 0.067; exec_name = argv[0]; - if (argc != 6) die("Invalid number of arguments"); + memset(&info, 0, sizeof(info)); + encoder = get_vpx_encoder_by_name(argv[1]); if (!encoder) die("Unsupported codec."); diff --git a/source/libvpx/examples/vp9_spatial_scalable_encoder.c b/source/libvpx/examples/vp9_spatial_svc_encoder.c index 983f52d..983f52d 100644 --- a/source/libvpx/examples/vp9_spatial_scalable_encoder.c +++ b/source/libvpx/examples/vp9_spatial_svc_encoder.c diff --git a/source/libvpx/examples/vpx_temporal_scalable_patterns.c b/source/libvpx/examples/vpx_temporal_svc_encoder.c index 07dd318..e45b50c 100644 --- a/source/libvpx/examples/vpx_temporal_scalable_patterns.c +++ b/source/libvpx/examples/vpx_temporal_svc_encoder.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -// This is an example demonstrating how to implement a multi-layer VP9 +// This is an example demonstrating how to implement a multi-layer VPx // encoding scheme based on temporal scalability for video applications // that benefit from a scalable bitstream. diff --git a/source/libvpx/test/convolve_test.cc b/source/libvpx/test/convolve_test.cc index 37ee0ef..cbb4036 100644 --- a/source/libvpx/test/convolve_test.cc +++ b/source/libvpx/test/convolve_test.cc @@ -634,7 +634,7 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(64, 64, &convolve8_ssse3))); #endif -#if HAVE_NEON +#if HAVE_NEON_ASM const ConvolveFunctions convolve8_neon( vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, diff --git a/source/libvpx/test/dct16x16_test.cc b/source/libvpx/test/dct16x16_test.cc index cb5562e..143a267 100644 --- a/source/libvpx/test/dct16x16_test.cc +++ b/source/libvpx/test/dct16x16_test.cc @@ -512,7 +512,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P( NEON, Trans16x16DCT, ::testing::Values( diff --git a/source/libvpx/test/dct32x32_test.cc b/source/libvpx/test/dct32x32_test.cc index 013f451..72c0bd6 100644 --- a/source/libvpx/test/dct32x32_test.cc +++ b/source/libvpx/test/dct32x32_test.cc @@ -248,7 +248,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0), make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1))); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P( NEON, Trans32x32Test, ::testing::Values( diff --git a/source/libvpx/test/fdct4x4_test.cc b/source/libvpx/test/fdct4x4_test.cc index 02458db..030665e 100644 --- a/source/libvpx/test/fdct4x4_test.cc +++ b/source/libvpx/test/fdct4x4_test.cc @@ -338,7 +338,7 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0))); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P( NEON, Trans4x4DCT, ::testing::Values( @@ -353,6 +353,13 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3))); #endif +#if CONFIG_USE_X86INC && HAVE_MMX +INSTANTIATE_TEST_CASE_P( + MMX, Trans4x4WHT, + ::testing::Values( + make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0))); +#endif + #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4DCT, diff --git a/source/libvpx/test/fdct8x8_test.cc b/source/libvpx/test/fdct8x8_test.cc index 6f2d7d1..c7cf164 100644 --- a/source/libvpx/test/fdct8x8_test.cc +++ b/source/libvpx/test/fdct8x8_test.cc @@ -313,7 +313,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3))); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P( NEON, FwdTrans8x8DCT, ::testing::Values( @@ -340,4 +340,11 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3))); #endif + +#if HAVE_SSSE3 && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P( + SSSE3, FwdTrans8x8DCT, + ::testing::Values( + make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0))); +#endif } // namespace diff --git a/source/libvpx/test/partial_idct_test.cc b/source/libvpx/test/partial_idct_test.cc index 8849ce6..79ef521 100644 --- a/source/libvpx/test/partial_idct_test.cc +++ b/source/libvpx/test/partial_idct_test.cc @@ -132,15 +132,15 @@ INSTANTIATE_TEST_CASE_P( &vp9_idct16x16_1_add_c, TX_16X16, 1), make_tuple(&vp9_idct8x8_64_add_c, - &vp9_idct8x8_10_add_c, - TX_8X8, 10), + &vp9_idct8x8_12_add_c, + TX_8X8, 12), make_tuple(&vp9_idct8x8_64_add_c, &vp9_idct8x8_1_add_c, TX_8X8, 1), make_tuple(&vp9_idct4x4_16_add_c, &vp9_idct4x4_1_add_c, TX_4X4, 1))); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P( NEON, PartialIDctTest, ::testing::Values( @@ -154,8 +154,8 @@ INSTANTIATE_TEST_CASE_P( &vp9_idct16x16_1_add_neon, TX_16X16, 1), make_tuple(&vp9_idct8x8_64_add_c, - &vp9_idct8x8_10_add_neon, - TX_8X8, 10), + &vp9_idct8x8_12_add_neon, + TX_8X8, 12), make_tuple(&vp9_idct8x8_64_add_c, &vp9_idct8x8_1_add_neon, TX_8X8, 1), @@ -181,8 +181,8 @@ INSTANTIATE_TEST_CASE_P( &vp9_idct16x16_1_add_sse2, TX_16X16, 1), make_tuple(&vp9_idct8x8_64_add_c, - &vp9_idct8x8_10_add_sse2, - TX_8X8, 10), + &vp9_idct8x8_12_add_sse2, + TX_8X8, 12), make_tuple(&vp9_idct8x8_64_add_c, &vp9_idct8x8_1_add_sse2, TX_8X8, 1), @@ -190,4 +190,13 @@ INSTANTIATE_TEST_CASE_P( &vp9_idct4x4_1_add_sse2, TX_4X4, 1))); #endif + +#if HAVE_SSSE3 && ARCH_X86_64 +INSTANTIATE_TEST_CASE_P( + SSSE3, PartialIDctTest, + ::testing::Values( + make_tuple(&vp9_idct8x8_64_add_c, + &vp9_idct8x8_12_add_ssse3, + TX_8X8, 12))); +#endif } // namespace diff --git a/source/libvpx/test/register_state_check.h b/source/libvpx/test/register_state_check.h index 5987fe3..1ee149b 100644 --- a/source/libvpx/test/register_state_check.h +++ b/source/libvpx/test/register_state_check.h @@ -82,8 +82,8 @@ class RegisterStateCheck { } // namespace libvpx_test -#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) && defined(CONFIG_VP9) \ - && !CONFIG_SHARED && HAVE_NEON && CONFIG_VP9 +#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && defined(CONFIG_VP9) \ + && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9 #include "vpx/vpx_integer.h" diff --git a/source/libvpx/test/resize_util.sh b/source/libvpx/test/resize_util.sh new file mode 100755 index 0000000..2a8e3fb --- /dev/null +++ b/source/libvpx/test/resize_util.sh @@ -0,0 +1,66 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx resize_util example code. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to resize_util_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +resize_util_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Resizes $YUV_RAW_INPUT using the resize_util example. $1 is the output +# dimensions that will be passed to resize_util. +resize_util() { + local resizer="${LIBVPX_BIN_PATH}/resize_util${VPX_TEST_EXE_SUFFIX}" + local output_file="${VPX_TEST_OUTPUT_DIR}/resize_util.raw" + local frames_to_resize="10" + local target_dimensions="$1" + + # resize_util is available only when CONFIG_SHARED is disabled. + if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then + [ -x "${resizer}" ] || return 1 + + eval "${resizer}" "${YUV_RAW_INPUT}" \ + "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \ + "${target_dimensions}" "${output_file}" ${frames_to_resize} \ + ${devnull} + + [ -e "${output_file}" ] || return 1 + fi +} + +# Halves each dimension of $YUV_RAW_INPUT using resize_util(). +resize_down() { + local target_width=$((${YUV_RAW_INPUT_WIDTH} / 2)) + local target_height=$((${YUV_RAW_INPUT_HEIGHT} / 2)) + + resize_util "${target_width}x${target_height}" +} + +# Doubles each dimension of $YUV_RAW_INPUT using resize_util(). +resize_up() { + local target_width=$((${YUV_RAW_INPUT_WIDTH} * 2)) + local target_height=$((${YUV_RAW_INPUT_HEIGHT} * 2)) + + resize_util "${target_width}x${target_height}" +} + +resize_util_tests="resize_down + resize_up" + +run_tests resize_util_verify_environment "${resize_util_tests}" diff --git a/source/libvpx/test/subtract_test.cc b/source/libvpx/test/subtract_test.cc index 3efb955..63e999d 100644 --- a/source/libvpx/test/subtract_test.cc +++ b/source/libvpx/test/subtract_test.cc @@ -105,7 +105,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) { INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest, ::testing::Values(vp8_subtract_b_c)); -#if HAVE_NEON +#if HAVE_NEON_ASM INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest, ::testing::Values(vp8_subtract_b_neon)); #endif diff --git a/source/libvpx/test/test-data.sha1 b/source/libvpx/test/test-data.sha1 index cf2ad1e..9c23929 100644 --- a/source/libvpx/test/test-data.sha1 +++ b/source/libvpx/test/test-data.sha1 @@ -635,3 +635,8 @@ be0fe64a1a4933696ff92d93f9bdecdbd886dc13 vp90-2-14-resize-fp-tiles-16-8.webm.md 1765315acccfe6cd12230e731369fcb15325ebfa vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm 1ef480392112b3509cb190afbb96f9a38dd9fbac vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 +e615575ded499ea1d992f3b38e3baa434509cdcd vp90-2-15-segkey.webm +e3ab35d4316c5e81325c50f5236ceca4bc0d35df vp90-2-15-segkey.webm.md5 +9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d vp90-2-15-segkey_adpq.webm +8f46ba5f785d0c2170591a153e0d0d146a7c8090 vp90-2-15-segkey_adpq.webm.md5 + diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk index 0dcb6c8..44d2f9c 100644 --- a/source/libvpx/test/test.mk +++ b/source/libvpx/test/test.mk @@ -748,6 +748,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.w LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/source/libvpx/test/test_vectors.cc b/source/libvpx/test/test_vectors.cc index ff3c389..fd8c4c3 100644 --- a/source/libvpx/test/test_vectors.cc +++ b/source/libvpx/test/test_vectors.cc @@ -177,7 +177,8 @@ const char *const kVP9TestVectors[] = { "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm", "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", - "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm" + "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", + "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm" }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER diff --git a/source/libvpx/test/tools_common.sh b/source/libvpx/test/tools_common.sh index 30f0fae..9c10d48 100755 --- a/source/libvpx/test/tools_common.sh +++ b/source/libvpx/test/tools_common.sh @@ -18,7 +18,9 @@ set -e devnull='> /dev/null 2>&1' vlog() { - [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "$@" + if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then + echo "$@" + fi } # Sets $VPX_TOOL_TEST to the name specified by positional parameter one. diff --git a/source/libvpx/test/twopass_encoder.sh b/source/libvpx/test/twopass_encoder.sh new file mode 100755 index 0000000..fe3cbbb --- /dev/null +++ b/source/libvpx/test/twopass_encoder.sh @@ -0,0 +1,59 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx twopass_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to twopass_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +twopass_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs twopass_encoder using the codec specified by $1. +twopass_encoder() { + local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf" + + [ -x "${encoder}" ] || return 1 + + eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +twopass_encoder_vp8() { + if [ "$(vp8_encode_available)" = "yes" ]; then + twopass_encoder vp8 || return 1 + fi +} + +# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this +# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast +# machine. +DISABLED_twopass_encoder_vp9() { + if [ "$(vp9_encode_available)" = "yes" ]; then + twopass_encoder vp9 || return 1 + fi +} + +twopass_encoder_tests="twopass_encoder_vp8 + DISABLED_twopass_encoder_vp9" + +run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}" diff --git a/source/libvpx/test/variance_test.cc b/source/libvpx/test/variance_test.cc index 817ba14..c9bf13a 100644 --- a/source/libvpx/test/variance_test.cc +++ b/source/libvpx/test/variance_test.cc @@ -294,41 +294,60 @@ TEST_P(VP8VarianceTest, Zero) { ZeroTest(); } TEST_P(VP8VarianceTest, Ref) { RefTest(); } TEST_P(VP8VarianceTest, OneQuarter) { OneQuarterTest(); } +const vp8_variance_fn_t variance4x4_c = vp8_variance4x4_c; +const vp8_variance_fn_t variance8x8_c = vp8_variance8x8_c; +const vp8_variance_fn_t variance8x16_c = vp8_variance8x16_c; +const vp8_variance_fn_t variance16x8_c = vp8_variance16x8_c; +const vp8_variance_fn_t variance16x16_c = vp8_variance16x16_c; INSTANTIATE_TEST_CASE_P( C, VP8VarianceTest, - ::testing::Values(make_tuple(2, 2, vp8_variance4x4_c), - make_tuple(3, 3, vp8_variance8x8_c), - make_tuple(3, 4, vp8_variance8x16_c), - make_tuple(4, 3, vp8_variance16x8_c), - make_tuple(4, 4, vp8_variance16x16_c))); + ::testing::Values(make_tuple(2, 2, variance4x4_c), + make_tuple(3, 3, variance8x8_c), + make_tuple(3, 4, variance8x16_c), + make_tuple(4, 3, variance16x8_c), + make_tuple(4, 4, variance16x16_c))); #if HAVE_NEON +const vp8_variance_fn_t variance8x8_neon = vp8_variance8x8_neon; +const vp8_variance_fn_t variance8x16_neon = vp8_variance8x16_neon; +const vp8_variance_fn_t variance16x8_neon = vp8_variance16x8_neon; +const vp8_variance_fn_t variance16x16_neon = vp8_variance16x16_neon; INSTANTIATE_TEST_CASE_P( NEON, VP8VarianceTest, - ::testing::Values(make_tuple(3, 3, vp8_variance8x8_neon), - make_tuple(3, 4, vp8_variance8x16_neon), - make_tuple(4, 3, vp8_variance16x8_neon), - make_tuple(4, 4, vp8_variance16x16_neon))); + ::testing::Values(make_tuple(3, 3, variance8x8_neon), + make_tuple(3, 4, variance8x16_neon), + make_tuple(4, 3, variance16x8_neon), + make_tuple(4, 4, variance16x16_neon))); #endif #if HAVE_MMX +const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx; +const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx; +const vp8_variance_fn_t variance8x16_mmx = vp8_variance8x16_mmx; +const vp8_variance_fn_t variance16x8_mmx = vp8_variance16x8_mmx; +const vp8_variance_fn_t variance16x16_mmx = vp8_variance16x16_mmx; INSTANTIATE_TEST_CASE_P( MMX, VP8VarianceTest, - ::testing::Values(make_tuple(2, 2, vp8_variance4x4_mmx), - make_tuple(3, 3, vp8_variance8x8_mmx), - make_tuple(3, 4, vp8_variance8x16_mmx), - make_tuple(4, 3, vp8_variance16x8_mmx), - make_tuple(4, 4, vp8_variance16x16_mmx))); + ::testing::Values(make_tuple(2, 2, variance4x4_mmx), + make_tuple(3, 3, variance8x8_mmx), + make_tuple(3, 4, variance8x16_mmx), + make_tuple(4, 3, variance16x8_mmx), + make_tuple(4, 4, variance16x16_mmx))); #endif #if HAVE_SSE2 +const vp8_variance_fn_t variance4x4_wmt = vp8_variance4x4_wmt; +const vp8_variance_fn_t variance8x8_wmt = vp8_variance8x8_wmt; +const vp8_variance_fn_t variance8x16_wmt = vp8_variance8x16_wmt; +const vp8_variance_fn_t variance16x8_wmt = vp8_variance16x8_wmt; +const vp8_variance_fn_t variance16x16_wmt = vp8_variance16x16_wmt; INSTANTIATE_TEST_CASE_P( SSE2, VP8VarianceTest, - ::testing::Values(make_tuple(2, 2, vp8_variance4x4_wmt), - make_tuple(3, 3, vp8_variance8x8_wmt), - make_tuple(3, 4, vp8_variance8x16_wmt), - make_tuple(4, 3, vp8_variance16x8_wmt), - make_tuple(4, 4, vp8_variance16x16_wmt))); + ::testing::Values(make_tuple(2, 2, variance4x4_wmt), + make_tuple(3, 3, variance8x8_wmt), + make_tuple(3, 4, variance8x16_wmt), + make_tuple(4, 3, variance16x8_wmt), + make_tuple(4, 4, variance16x16_wmt))); #endif #endif // CONFIG_VP8_ENCODER @@ -350,150 +369,337 @@ TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); } TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); } TEST_P(VP9VarianceTest, OneQuarter) { OneQuarterTest(); } +const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c; +const vp9_variance_fn_t variance4x8_c = vp9_variance4x8_c; +const vp9_variance_fn_t variance8x4_c = vp9_variance8x4_c; +const vp9_variance_fn_t variance8x8_c = vp9_variance8x8_c; +const vp9_variance_fn_t variance8x16_c = vp9_variance8x16_c; +const vp9_variance_fn_t variance16x8_c = vp9_variance16x8_c; +const vp9_variance_fn_t variance16x16_c = vp9_variance16x16_c; +const vp9_variance_fn_t variance16x32_c = vp9_variance16x32_c; +const vp9_variance_fn_t variance32x16_c = vp9_variance32x16_c; +const vp9_variance_fn_t variance32x32_c = vp9_variance32x32_c; +const vp9_variance_fn_t variance32x64_c = vp9_variance32x64_c; +const vp9_variance_fn_t variance64x32_c = vp9_variance64x32_c; +const vp9_variance_fn_t variance64x64_c = vp9_variance64x64_c; INSTANTIATE_TEST_CASE_P( C, VP9VarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_variance4x4_c), - make_tuple(2, 3, vp9_variance4x8_c), - make_tuple(3, 2, vp9_variance8x4_c), - make_tuple(3, 3, vp9_variance8x8_c), - make_tuple(3, 4, vp9_variance8x16_c), - make_tuple(4, 3, vp9_variance16x8_c), - make_tuple(4, 4, vp9_variance16x16_c), - make_tuple(4, 5, vp9_variance16x32_c), - make_tuple(5, 4, vp9_variance32x16_c), - make_tuple(5, 5, vp9_variance32x32_c), - make_tuple(5, 6, vp9_variance32x64_c), - make_tuple(6, 5, vp9_variance64x32_c), - make_tuple(6, 6, vp9_variance64x64_c))); - + ::testing::Values(make_tuple(2, 2, variance4x4_c), + make_tuple(2, 3, variance4x8_c), + make_tuple(3, 2, variance8x4_c), + make_tuple(3, 3, variance8x8_c), + make_tuple(3, 4, variance8x16_c), + make_tuple(4, 3, variance16x8_c), + make_tuple(4, 4, variance16x16_c), + make_tuple(4, 5, variance16x32_c), + make_tuple(5, 4, variance32x16_c), + make_tuple(5, 5, variance32x32_c), + make_tuple(5, 6, variance32x64_c), + make_tuple(6, 5, variance64x32_c), + make_tuple(6, 6, variance64x64_c))); + +const vp9_subpixvariance_fn_t subpel_variance4x4_c = + vp9_sub_pixel_variance4x4_c; +const vp9_subpixvariance_fn_t subpel_variance4x8_c = + vp9_sub_pixel_variance4x8_c; +const vp9_subpixvariance_fn_t subpel_variance8x4_c = + vp9_sub_pixel_variance8x4_c; +const vp9_subpixvariance_fn_t subpel_variance8x8_c = + vp9_sub_pixel_variance8x8_c; +const vp9_subpixvariance_fn_t subpel_variance8x16_c = + vp9_sub_pixel_variance8x16_c; +const vp9_subpixvariance_fn_t subpel_variance16x8_c = + vp9_sub_pixel_variance16x8_c; +const vp9_subpixvariance_fn_t subpel_variance16x16_c = + vp9_sub_pixel_variance16x16_c; +const vp9_subpixvariance_fn_t subpel_variance16x32_c = + vp9_sub_pixel_variance16x32_c; +const vp9_subpixvariance_fn_t subpel_variance32x16_c = + vp9_sub_pixel_variance32x16_c; +const vp9_subpixvariance_fn_t subpel_variance32x32_c = + vp9_sub_pixel_variance32x32_c; +const vp9_subpixvariance_fn_t subpel_variance32x64_c = + vp9_sub_pixel_variance32x64_c; +const vp9_subpixvariance_fn_t subpel_variance64x32_c = + vp9_sub_pixel_variance64x32_c; +const vp9_subpixvariance_fn_t subpel_variance64x64_c = + vp9_sub_pixel_variance64x64_c; INSTANTIATE_TEST_CASE_P( C, VP9SubpelVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_c), - make_tuple(2, 3, vp9_sub_pixel_variance4x8_c), - make_tuple(3, 2, vp9_sub_pixel_variance8x4_c), - make_tuple(3, 3, vp9_sub_pixel_variance8x8_c), - make_tuple(3, 4, vp9_sub_pixel_variance8x16_c), - make_tuple(4, 3, vp9_sub_pixel_variance16x8_c), - make_tuple(4, 4, vp9_sub_pixel_variance16x16_c), - make_tuple(4, 5, vp9_sub_pixel_variance16x32_c), - make_tuple(5, 4, vp9_sub_pixel_variance32x16_c), - make_tuple(5, 5, vp9_sub_pixel_variance32x32_c), - make_tuple(5, 6, vp9_sub_pixel_variance32x64_c), - make_tuple(6, 5, vp9_sub_pixel_variance64x32_c), - make_tuple(6, 6, vp9_sub_pixel_variance64x64_c))); - + ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c), + make_tuple(2, 3, subpel_variance4x8_c), + make_tuple(3, 2, subpel_variance8x4_c), + make_tuple(3, 3, subpel_variance8x8_c), + make_tuple(3, 4, subpel_variance8x16_c), + make_tuple(4, 3, subpel_variance16x8_c), + make_tuple(4, 4, subpel_variance16x16_c), + make_tuple(4, 5, subpel_variance16x32_c), + make_tuple(5, 4, subpel_variance32x16_c), + make_tuple(5, 5, subpel_variance32x32_c), + make_tuple(5, 6, subpel_variance32x64_c), + make_tuple(6, 5, subpel_variance64x32_c), + make_tuple(6, 6, subpel_variance64x64_c))); + +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c = + vp9_sub_pixel_avg_variance4x4_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c = + vp9_sub_pixel_avg_variance4x8_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_c = + vp9_sub_pixel_avg_variance8x4_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_c = + vp9_sub_pixel_avg_variance8x8_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_c = + vp9_sub_pixel_avg_variance8x16_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_c = + vp9_sub_pixel_avg_variance16x8_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_c = + vp9_sub_pixel_avg_variance16x16_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_c = + vp9_sub_pixel_avg_variance16x32_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_c = + vp9_sub_pixel_avg_variance32x16_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_c = + vp9_sub_pixel_avg_variance32x32_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_c = + vp9_sub_pixel_avg_variance32x64_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_c = + vp9_sub_pixel_avg_variance64x32_c; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_c = + vp9_sub_pixel_avg_variance64x64_c; INSTANTIATE_TEST_CASE_P( C, VP9SubpelAvgVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_c), - make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_c), - make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_c), - make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_c), - make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_c), - make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_c), - make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_c), - make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_c), - make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_c), - make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_c), - make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_c), - make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_c), - make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_c))); + ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c), + make_tuple(2, 3, subpel_avg_variance4x8_c), + make_tuple(3, 2, subpel_avg_variance8x4_c), + make_tuple(3, 3, subpel_avg_variance8x8_c), + make_tuple(3, 4, subpel_avg_variance8x16_c), + make_tuple(4, 3, subpel_avg_variance16x8_c), + make_tuple(4, 4, subpel_avg_variance16x16_c), + make_tuple(4, 5, subpel_avg_variance16x32_c), + make_tuple(5, 4, subpel_avg_variance32x16_c), + make_tuple(5, 5, subpel_avg_variance32x32_c), + make_tuple(5, 6, subpel_avg_variance32x64_c), + make_tuple(6, 5, subpel_avg_variance64x32_c), + make_tuple(6, 6, subpel_avg_variance64x64_c))); #if HAVE_MMX +const vp9_variance_fn_t variance4x4_mmx = vp9_variance4x4_mmx; +const vp9_variance_fn_t variance8x8_mmx = vp9_variance8x8_mmx; +const vp9_variance_fn_t variance8x16_mmx = vp9_variance8x16_mmx; +const vp9_variance_fn_t variance16x8_mmx = vp9_variance16x8_mmx; +const vp9_variance_fn_t variance16x16_mmx = vp9_variance16x16_mmx; INSTANTIATE_TEST_CASE_P( MMX, VP9VarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_variance4x4_mmx), - make_tuple(3, 3, vp9_variance8x8_mmx), - make_tuple(3, 4, vp9_variance8x16_mmx), - make_tuple(4, 3, vp9_variance16x8_mmx), - make_tuple(4, 4, vp9_variance16x16_mmx))); + ::testing::Values(make_tuple(2, 2, variance4x4_mmx), + make_tuple(3, 3, variance8x8_mmx), + make_tuple(3, 4, variance8x16_mmx), + make_tuple(4, 3, variance16x8_mmx), + make_tuple(4, 4, variance16x16_mmx))); #endif #if HAVE_SSE2 #if CONFIG_USE_X86INC +const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2; +const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2; +const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2; +const vp9_variance_fn_t variance8x8_sse2 = vp9_variance8x8_sse2; +const vp9_variance_fn_t variance8x16_sse2 = vp9_variance8x16_sse2; +const vp9_variance_fn_t variance16x8_sse2 = vp9_variance16x8_sse2; +const vp9_variance_fn_t variance16x16_sse2 = vp9_variance16x16_sse2; +const vp9_variance_fn_t variance16x32_sse2 = vp9_variance16x32_sse2; +const vp9_variance_fn_t variance32x16_sse2 = vp9_variance32x16_sse2; +const vp9_variance_fn_t variance32x32_sse2 = vp9_variance32x32_sse2; +const vp9_variance_fn_t variance32x64_sse2 = vp9_variance32x64_sse2; +const vp9_variance_fn_t variance64x32_sse2 = vp9_variance64x32_sse2; +const vp9_variance_fn_t variance64x64_sse2 = vp9_variance64x64_sse2; INSTANTIATE_TEST_CASE_P( SSE2, VP9VarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_variance4x4_sse2), - make_tuple(2, 3, vp9_variance4x8_sse2), - make_tuple(3, 2, vp9_variance8x4_sse2), - make_tuple(3, 3, vp9_variance8x8_sse2), - make_tuple(3, 4, vp9_variance8x16_sse2), - make_tuple(4, 3, vp9_variance16x8_sse2), - make_tuple(4, 4, vp9_variance16x16_sse2), - make_tuple(4, 5, vp9_variance16x32_sse2), - make_tuple(5, 4, vp9_variance32x16_sse2), - make_tuple(5, 5, vp9_variance32x32_sse2), - make_tuple(5, 6, vp9_variance32x64_sse2), - make_tuple(6, 5, vp9_variance64x32_sse2), - make_tuple(6, 6, vp9_variance64x64_sse2))); - + ::testing::Values(make_tuple(2, 2, variance4x4_sse2), + make_tuple(2, 3, variance4x8_sse2), + make_tuple(3, 2, variance8x4_sse2), + make_tuple(3, 3, variance8x8_sse2), + make_tuple(3, 4, variance8x16_sse2), + make_tuple(4, 3, variance16x8_sse2), + make_tuple(4, 4, variance16x16_sse2), + make_tuple(4, 5, variance16x32_sse2), + make_tuple(5, 4, variance32x16_sse2), + make_tuple(5, 5, variance32x32_sse2), + make_tuple(5, 6, variance32x64_sse2), + make_tuple(6, 5, variance64x32_sse2), + make_tuple(6, 6, variance64x64_sse2))); + +const vp9_subpixvariance_fn_t subpel_variance4x4_sse = + vp9_sub_pixel_variance4x4_sse; +const vp9_subpixvariance_fn_t subpel_variance4x8_sse = + vp9_sub_pixel_variance4x8_sse; +const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 = + vp9_sub_pixel_variance8x4_sse2; +const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 = + vp9_sub_pixel_variance8x8_sse2; +const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 = + vp9_sub_pixel_variance8x16_sse2; +const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 = + vp9_sub_pixel_variance16x8_sse2; +const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 = + vp9_sub_pixel_variance16x16_sse2; +const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 = + vp9_sub_pixel_variance16x32_sse2; +const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 = + vp9_sub_pixel_variance32x16_sse2; +const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 = + vp9_sub_pixel_variance32x32_sse2; +const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 = + vp9_sub_pixel_variance32x64_sse2; +const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 = + vp9_sub_pixel_variance64x32_sse2; +const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 = + vp9_sub_pixel_variance64x64_sse2; INSTANTIATE_TEST_CASE_P( SSE2, VP9SubpelVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_sse), - make_tuple(2, 3, vp9_sub_pixel_variance4x8_sse), - make_tuple(3, 2, vp9_sub_pixel_variance8x4_sse2), - make_tuple(3, 3, vp9_sub_pixel_variance8x8_sse2), - make_tuple(3, 4, vp9_sub_pixel_variance8x16_sse2), - make_tuple(4, 3, vp9_sub_pixel_variance16x8_sse2), - make_tuple(4, 4, vp9_sub_pixel_variance16x16_sse2), - make_tuple(4, 5, vp9_sub_pixel_variance16x32_sse2), - make_tuple(5, 4, vp9_sub_pixel_variance32x16_sse2), - make_tuple(5, 5, vp9_sub_pixel_variance32x32_sse2), - make_tuple(5, 6, vp9_sub_pixel_variance32x64_sse2), - make_tuple(6, 5, vp9_sub_pixel_variance64x32_sse2), - make_tuple(6, 6, vp9_sub_pixel_variance64x64_sse2))); - + ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse), + make_tuple(2, 3, subpel_variance4x8_sse), + make_tuple(3, 2, subpel_variance8x4_sse2), + make_tuple(3, 3, subpel_variance8x8_sse2), + make_tuple(3, 4, subpel_variance8x16_sse2), + make_tuple(4, 3, subpel_variance16x8_sse2), + make_tuple(4, 4, subpel_variance16x16_sse2), + make_tuple(4, 5, subpel_variance16x32_sse2), + make_tuple(5, 4, subpel_variance32x16_sse2), + make_tuple(5, 5, subpel_variance32x32_sse2), + make_tuple(5, 6, subpel_variance32x64_sse2), + make_tuple(6, 5, subpel_variance64x32_sse2), + make_tuple(6, 6, subpel_variance64x64_sse2))); + +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_sse = + vp9_sub_pixel_avg_variance4x4_sse; +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_sse = + vp9_sub_pixel_avg_variance4x8_sse; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_sse2 = + vp9_sub_pixel_avg_variance8x4_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_sse2 = + vp9_sub_pixel_avg_variance8x8_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_sse2 = + vp9_sub_pixel_avg_variance8x16_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_sse2 = + vp9_sub_pixel_avg_variance16x8_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_sse2 = + vp9_sub_pixel_avg_variance16x16_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_sse2 = + vp9_sub_pixel_avg_variance16x32_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_sse2 = + vp9_sub_pixel_avg_variance32x16_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_sse2 = + vp9_sub_pixel_avg_variance32x32_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_sse2 = + vp9_sub_pixel_avg_variance32x64_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_sse2 = + vp9_sub_pixel_avg_variance64x32_sse2; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_sse2 = + vp9_sub_pixel_avg_variance64x64_sse2; INSTANTIATE_TEST_CASE_P( SSE2, VP9SubpelAvgVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_sse), - make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_sse), - make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_sse2), - make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_sse2), - make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_sse2), - make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_sse2), - make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_sse2), - make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_sse2), - make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_sse2), - make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_sse2), - make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_sse2), - make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_sse2), - make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_sse2))); + ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse), + make_tuple(2, 3, subpel_avg_variance4x8_sse), + make_tuple(3, 2, subpel_avg_variance8x4_sse2), + make_tuple(3, 3, subpel_avg_variance8x8_sse2), + make_tuple(3, 4, subpel_avg_variance8x16_sse2), + make_tuple(4, 3, subpel_avg_variance16x8_sse2), + make_tuple(4, 4, subpel_avg_variance16x16_sse2), + make_tuple(4, 5, subpel_avg_variance16x32_sse2), + make_tuple(5, 4, subpel_avg_variance32x16_sse2), + make_tuple(5, 5, subpel_avg_variance32x32_sse2), + make_tuple(5, 6, subpel_avg_variance32x64_sse2), + make_tuple(6, 5, subpel_avg_variance64x32_sse2), + make_tuple(6, 6, subpel_avg_variance64x64_sse2))); #endif #endif #if HAVE_SSSE3 #if CONFIG_USE_X86INC +const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 = + vp9_sub_pixel_variance4x4_ssse3; +const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 = + vp9_sub_pixel_variance4x8_ssse3; +const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 = + vp9_sub_pixel_variance8x4_ssse3; +const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 = + vp9_sub_pixel_variance8x8_ssse3; +const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 = + vp9_sub_pixel_variance8x16_ssse3; +const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 = + vp9_sub_pixel_variance16x8_ssse3; +const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 = + vp9_sub_pixel_variance16x16_ssse3; +const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 = + vp9_sub_pixel_variance16x32_ssse3; +const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 = + vp9_sub_pixel_variance32x16_ssse3; +const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 = + vp9_sub_pixel_variance32x32_ssse3; +const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 = + vp9_sub_pixel_variance32x64_ssse3; +const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 = + vp9_sub_pixel_variance64x32_ssse3; +const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 = + vp9_sub_pixel_variance64x64_ssse3; INSTANTIATE_TEST_CASE_P( SSSE3, VP9SubpelVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_ssse3), - make_tuple(2, 3, vp9_sub_pixel_variance4x8_ssse3), - make_tuple(3, 2, vp9_sub_pixel_variance8x4_ssse3), - make_tuple(3, 3, vp9_sub_pixel_variance8x8_ssse3), - make_tuple(3, 4, vp9_sub_pixel_variance8x16_ssse3), - make_tuple(4, 3, vp9_sub_pixel_variance16x8_ssse3), - make_tuple(4, 4, vp9_sub_pixel_variance16x16_ssse3), - make_tuple(4, 5, vp9_sub_pixel_variance16x32_ssse3), - make_tuple(5, 4, vp9_sub_pixel_variance32x16_ssse3), - make_tuple(5, 5, vp9_sub_pixel_variance32x32_ssse3), - make_tuple(5, 6, vp9_sub_pixel_variance32x64_ssse3), - make_tuple(6, 5, vp9_sub_pixel_variance64x32_ssse3), - make_tuple(6, 6, vp9_sub_pixel_variance64x64_ssse3))); - + ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3), + make_tuple(2, 3, subpel_variance4x8_ssse3), + make_tuple(3, 2, subpel_variance8x4_ssse3), + make_tuple(3, 3, subpel_variance8x8_ssse3), + make_tuple(3, 4, subpel_variance8x16_ssse3), + make_tuple(4, 3, subpel_variance16x8_ssse3), + make_tuple(4, 4, subpel_variance16x16_ssse3), + make_tuple(4, 5, subpel_variance16x32_ssse3), + make_tuple(5, 4, subpel_variance32x16_ssse3), + make_tuple(5, 5, subpel_variance32x32_ssse3), + make_tuple(5, 6, subpel_variance32x64_ssse3), + make_tuple(6, 5, subpel_variance64x32_ssse3), + make_tuple(6, 6, subpel_variance64x64_ssse3))); + +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_ssse3 = + vp9_sub_pixel_avg_variance4x4_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_ssse3 = + vp9_sub_pixel_avg_variance4x8_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_ssse3 = + vp9_sub_pixel_avg_variance8x4_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_ssse3 = + vp9_sub_pixel_avg_variance8x8_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_ssse3 = + vp9_sub_pixel_avg_variance8x16_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_ssse3 = + vp9_sub_pixel_avg_variance16x8_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_ssse3 = + vp9_sub_pixel_avg_variance16x16_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_ssse3 = + vp9_sub_pixel_avg_variance16x32_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_ssse3 = + vp9_sub_pixel_avg_variance32x16_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_ssse3 = + vp9_sub_pixel_avg_variance32x32_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_ssse3 = + vp9_sub_pixel_avg_variance32x64_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_ssse3 = + vp9_sub_pixel_avg_variance64x32_ssse3; +const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_ssse3 = + vp9_sub_pixel_avg_variance64x64_ssse3; INSTANTIATE_TEST_CASE_P( SSSE3, VP9SubpelAvgVarianceTest, - ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_ssse3), - make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_ssse3), - make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_ssse3), - make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_ssse3), - make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_ssse3), - make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_ssse3), - make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_ssse3), - make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_ssse3), - make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_ssse3), - make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_ssse3), - make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_ssse3), - make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_ssse3), - make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_ssse3))); + ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3), + make_tuple(2, 3, subpel_avg_variance4x8_ssse3), + make_tuple(3, 2, subpel_avg_variance8x4_ssse3), + make_tuple(3, 3, subpel_avg_variance8x8_ssse3), + make_tuple(3, 4, subpel_avg_variance8x16_ssse3), + make_tuple(4, 3, subpel_avg_variance16x8_ssse3), + make_tuple(4, 4, subpel_avg_variance16x16_ssse3), + make_tuple(4, 5, subpel_avg_variance16x32_ssse3), + make_tuple(5, 4, subpel_avg_variance32x16_ssse3), + make_tuple(5, 5, subpel_avg_variance32x32_ssse3), + make_tuple(5, 6, subpel_avg_variance32x64_ssse3), + make_tuple(6, 5, subpel_avg_variance64x32_ssse3), + make_tuple(6, 6, subpel_avg_variance64x64_ssse3))); #endif #endif #endif // CONFIG_VP9_ENCODER diff --git a/source/libvpx/test/vp8cx_set_ref.sh b/source/libvpx/test/vp8cx_set_ref.sh new file mode 100755 index 0000000..ef9d0c0 --- /dev/null +++ b/source/libvpx/test/vp8cx_set_ref.sh @@ -0,0 +1,54 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx vp8cx_set_ref example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to vp8cx_set_ref_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +vp8cx_set_ref_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90. +# $1 is the codec name, which vp8cx_set_ref does not support at present: It's +# currently used only to name the output file. +# TODO(tomfinegan): Pass the codec param once the example is updated to support +# VP9. +vpx_set_ref() { + local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf" + local ref_frame_num=90 + + [ -x "${encoder}" ] || return 1 + + eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \ + "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +vp8cx_set_ref_vp8() { + if [ "$(vp8_encode_available)" = "yes" ]; then + vpx_set_ref vp8 || return 1 + fi +} + +vp8cx_set_ref_tests="vp8cx_set_ref_vp8" + +run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}" diff --git a/source/libvpx/vp8/common/arm/dequantize_arm.c b/source/libvpx/vp8/common/arm/dequantize_arm.c index 70e72aa..1f8157f 100644 --- a/source/libvpx/vp8/common/arm/dequantize_arm.c +++ b/source/libvpx/vp8/common/arm/dequantize_arm.c @@ -12,26 +12,9 @@ #include "vpx_config.h" #include "vp8/common/blockd.h" -#if HAVE_NEON -extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ); -#endif - #if HAVE_MEDIA extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); -#endif - -#if HAVE_NEON - -void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) -{ - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - - vp8_dequantize_b_loop_neon(Q, DQC, DQ); -} -#endif -#if HAVE_MEDIA void vp8_dequantize_b_v6(BLOCKD *d, short *DQC) { short *DQ = d->dqcoeff; diff --git a/source/libvpx/vp8/common/arm/loopfilter_arm.c b/source/libvpx/vp8/common/arm/loopfilter_arm.c index 3bdc967..f37ca63 100644 --- a/source/libvpx/vp8/common/arm/loopfilter_arm.c +++ b/source/libvpx/vp8/common/arm/loopfilter_arm.c @@ -25,20 +25,24 @@ extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6); extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6); #endif -#if HAVE_NEON +#if HAVE_NEON_ASM || HAVE_NEON typedef void loopfilter_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh); typedef void loopfilter_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v); +#endif +#if HAVE_NEON_ASM extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; -extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; - extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; +#endif + +#if HAVE_NEON +extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; +extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; #endif @@ -146,7 +150,9 @@ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign if (u_ptr) vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); } +#endif +#if HAVE_NEON_ASM /* Horizontal B Filtering */ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) diff --git a/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c index 60f69c8..54e709d 100644 --- a/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c +++ b/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c @@ -10,18 +10,16 @@ #include <arm_neon.h> -void vp8_dequantize_b_loop_neon( - int16_t *Q, - int16_t *DQC, - int16_t *DQ) { +#include "vp8/common/blockd.h" + +void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { int16x8x2_t qQ, qDQC, qDQ; - qQ = vld2q_s16(Q); + qQ = vld2q_s16(d->qcoeff); qDQC = vld2q_s16(DQC); qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); - vst2q_s16(DQ, qDQ); - return; + vst2q_s16(d->dqcoeff, qDQ); } diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm new file mode 100644 index 0000000..3a39210 --- /dev/null +++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm @@ -0,0 +1,81 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |idct_dequant_0_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_0_2x_neon(short *q, short dq, +; unsigned char *dst, int stride); +; r0 *q +; r1 dq +; r2 *dst +; r3 stride +|idct_dequant_0_2x_neon| PROC + push {r4, r5} + vpush {d8-d15} + + add r12, r2, #4 + vld1.32 {d2[0]}, [r2], r3 + vld1.32 {d8[0]}, [r12], r3 + vld1.32 {d2[1]}, [r2], r3 + vld1.32 {d8[1]}, [r12], r3 + vld1.32 {d4[0]}, [r2], r3 + vld1.32 {d10[0]}, [r12], r3 + vld1.32 {d4[1]}, [r2], r3 + vld1.32 {d10[1]}, [r12], r3 + + ldrh r12, [r0] ; lo q + ldrh r4, [r0, #32] ; hi q + mov r5, #0 + strh r5, [r0] + strh r5, [r0, #32] + + sxth r12, r12 ; lo + mul r0, r12, r1 + add r0, r0, #4 + asr r0, r0, #3 + vdup.16 q0, r0 + sxth r4, r4 ; hi + mul r0, r4, r1 + add r0, r0, #4 + asr r0, r0, #3 + vdup.16 q3, r0 + + vaddw.u8 q1, q0, d2 ; lo + vaddw.u8 q2, q0, d4 + vaddw.u8 q4, q3, d8 ; hi + vaddw.u8 q5, q3, d10 + + sub r2, r2, r3, lsl #2 ; dst - 4*stride + add r0, r2, #4 + + vqmovun.s16 d2, q1 ; lo + vqmovun.s16 d4, q2 + vqmovun.s16 d8, q4 ; hi + vqmovun.s16 d10, q5 + + vst1.32 {d2[0]}, [r2], r3 ; lo + vst1.32 {d8[0]}, [r0], r3 ; hi + vst1.32 {d2[1]}, [r2], r3 + vst1.32 {d8[1]}, [r0], r3 + vst1.32 {d4[0]}, [r2], r3 + vst1.32 {d10[0]}, [r0], r3 + vst1.32 {d4[1]}, [r2] + vst1.32 {d10[1]}, [r0] + + vpop {d8-d15} + pop {r4, r5} + bx lr + + ENDP ; |idct_dequant_0_2x_neon| + END diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c deleted file mode 100644 index 967c322..0000000 --- a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void idct_dequant_0_2x_neon( - int16_t *q, - int16_t dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int i, a0, a1; - int16x8x2_t q2Add; - int32x2_t d2s32, d4s32; - uint8x8_t d2u8, d4u8; - uint16x8_t q1u16, q2u16; - - a0 = ((q[0] * dq) + 4) >> 3; - a1 = ((q[16] * dq) + 4) >> 3; - q[0] = q[16] = 0; - q2Add.val[0] = vdupq_n_s16((int16_t)a0); - q2Add.val[1] = vdupq_n_s16((int16_t)a1); - - for (i = 0; i < 2; i++, dst += 4) { - dst0 = dst; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); - dst0 += stride; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d2s32)); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d4s32)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - - d2s32 = vreinterpret_s32_u8(d2u8); - d4s32 = vreinterpret_s32_u8(d4u8); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d2s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d2s32, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 1); - } - return; -} diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm new file mode 100644 index 0000000..8da0fa0 --- /dev/null +++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm @@ -0,0 +1,199 @@ +; +; Copyright (c) 2010 The Webm project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |idct_dequant_full_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_full_2x_neon(short *q, short *dq, +; unsigned char *dst, int stride); +; r0 *q, +; r1 *dq, +; r2 *dst +; r3 stride +|idct_dequant_full_2x_neon| PROC + vpush {d8-d15} + + vld1.16 {q0, q1}, [r1] ; dq (same l/r) + vld1.16 {q2, q3}, [r0] ; l q + add r0, r0, #32 + vld1.16 {q4, q5}, [r0] ; r q + add r12, r2, #4 + + ; interleave the predictors + vld1.32 {d28[0]}, [r2], r3 ; l pre + vld1.32 {d28[1]}, [r12], r3 ; r pre + vld1.32 {d29[0]}, [r2], r3 + vld1.32 {d29[1]}, [r12], r3 + vld1.32 {d30[0]}, [r2], r3 + vld1.32 {d30[1]}, [r12], r3 + vld1.32 {d31[0]}, [r2], r3 + vld1.32 {d31[1]}, [r12] + + adr r1, cospi8sqrt2minus1 ; pointer to the first constant + + ; dequant: q[i] = q[i] * dq[i] + vmul.i16 q2, q2, q0 + vmul.i16 q3, q3, q1 + vmul.i16 q4, q4, q0 + vmul.i16 q5, q5, q1 + + vld1.16 {d0}, [r1] + + ; q2: l0r0 q3: l8r8 + ; q4: l4r4 q5: l12r12 + vswp d5, d8 + vswp d7, d10 + + ; _CONSTANTS_ * 4,12 >> 16 + ; q6: 4 * sinpi : c1/temp1 + ; q7: 12 * sinpi : d1/temp2 + ; q8: 4 * cospi + ; q9: 12 * cospi + vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q7, q5, d0[2] + vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q9, q5, d0[0] + + vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 + vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 + + ; vqdmulh only accepts signed values. this was a problem because + ; our constant had the high bit set, and was treated as a negative value. + ; vqdmulh also doubles the value before it shifts by 16. we need to + ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, + ; so we can shift the constant without losing precision. this avoids + ; shift again afterward, but also avoids the sign issue. win win! + ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we + ; pre-shift it + vshr.s16 q8, q8, #1 + vshr.s16 q9, q9, #1 + + ; q4: 4 + 4 * cospi : d1/temp1 + ; q5: 12 + 12 * cospi : c1/temp2 + vqadd.s16 q4, q4, q8 + vqadd.s16 q5, q5, q9 + + ; c1 = temp1 - temp2 + ; d1 = temp1 + temp2 + vqsub.s16 q2, q6, q5 + vqadd.s16 q3, q4, q7 + + ; [0]: a1+d1 + ; [1]: b1+c1 + ; [2]: b1-c1 + ; [3]: a1-d1 + vqadd.s16 q4, q10, q3 + vqadd.s16 q5, q11, q2 + vqsub.s16 q6, q11, q2 + vqsub.s16 q7, q10, q3 + + ; rotate + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + ; idct loop 2 + ; q4: l 0, 4, 8,12 r 0, 4, 8,12 + ; q5: l 1, 5, 9,13 r 1, 5, 9,13 + ; q6: l 2, 6,10,14 r 2, 6,10,14 + ; q7: l 3, 7,11,15 r 3, 7,11,15 + + ; q8: 1 * sinpi : c1/temp1 + ; q9: 3 * sinpi : d1/temp2 + ; q10: 1 * cospi + ; q11: 3 * cospi + vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q9, q7, d0[2] + vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q11, q7, d0[0] + + vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 + vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 + + ; see note on shifting above + vshr.s16 q10, q10, #1 + vshr.s16 q11, q11, #1 + + ; q10: 1 + 1 * cospi : d1/temp1 + ; q11: 3 + 3 * cospi : c1/temp2 + vqadd.s16 q10, q5, q10 + vqadd.s16 q11, q7, q11 + + ; q8: c1 = temp1 - temp2 + ; q9: d1 = temp1 + temp2 + vqsub.s16 q8, q8, q11 + vqadd.s16 q9, q10, q9 + + ; a1+d1 + ; b1+c1 + ; b1-c1 + ; a1-d1 + vqadd.s16 q4, q2, q9 + vqadd.s16 q5, q3, q8 + vqsub.s16 q6, q3, q8 + vqsub.s16 q7, q2, q9 + + ; +4 >> 3 (rounding) + vrshr.s16 q4, q4, #3 ; lo + vrshr.s16 q5, q5, #3 + vrshr.s16 q6, q6, #3 ; hi + vrshr.s16 q7, q7, #3 + + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + ; adding pre + ; input is still packed. pre was read interleaved + vaddw.u8 q4, q4, d28 + vaddw.u8 q5, q5, d29 + vaddw.u8 q6, q6, d30 + vaddw.u8 q7, q7, d31 + + vmov.i16 q14, #0 + vmov q15, q14 + vst1.16 {q14, q15}, [r0] ; write over high input + sub r0, r0, #32 + vst1.16 {q14, q15}, [r0] ; write over low input + + sub r2, r2, r3, lsl #2 ; dst - 4*stride + add r1, r2, #4 ; hi + + ;saturate and narrow + vqmovun.s16 d0, q4 ; lo + vqmovun.s16 d1, q5 + vqmovun.s16 d2, q6 ; hi + vqmovun.s16 d3, q7 + + vst1.32 {d0[0]}, [r2], r3 ; lo + vst1.32 {d0[1]}, [r1], r3 ; hi + vst1.32 {d1[0]}, [r2], r3 + vst1.32 {d1[1]}, [r1], r3 + vst1.32 {d2[0]}, [r2], r3 + vst1.32 {d2[1]}, [r1], r3 + vst1.32 {d3[0]}, [r2] + vst1.32 {d3[1]}, [r1] + + vpop {d8-d15} + bx lr + + ENDP ; |idct_dequant_full_2x_neon| + +; Constant Pool +cospi8sqrt2minus1 DCD 0x4e7b +; because the lowest bit in 0x8a8c is 0, we can pre-shift this +sinpi8sqrt2 DCD 0x4546 + + END diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c deleted file mode 100644 index a60ed46..0000000 --- a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 17734; -// because the lowest bit in 0x8a8c is 0, we can pre-shift this - -void idct_dequant_full_2x_neon( - int16_t *q, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0, *dst1; - int32x2_t d28, d29, d30, d31; - int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x4x2_t q2tmp0, q2tmp1; - int16x8x2_t q2tmp2, q2tmp3; - int16x4_t dLow0, dLow1, dHigh0, dHigh1; - - d28 = d29 = d30 = d31 = vdup_n_s32(0); - - // load dq - q0 = vld1q_s16(dq); - dq += 8; - q1 = vld1q_s16(dq); - - // load q - q2 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q3 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q4 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q5 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - - // load src from dst - dst0 = dst; - dst1 = dst + 4; - d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); - dst0 += stride; - d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); - dst1 += stride; - d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); - dst0 += stride; - d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); - dst1 += stride; - - d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); - dst0 += stride; - d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); - dst1 += stride; - d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); - d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); - - q2 = vmulq_s16(q2, q0); - q3 = vmulq_s16(q3, q1); - q4 = vmulq_s16(q4, q0); - q5 = vmulq_s16(q5, q1); - - // vswp - dLow0 = vget_low_s16(q2); - dHigh0 = vget_high_s16(q2); - dLow1 = vget_low_s16(q4); - dHigh1 = vget_high_s16(q4); - q2 = vcombine_s16(dLow0, dLow1); - q4 = vcombine_s16(dHigh0, dHigh1); - - dLow0 = vget_low_s16(q3); - dHigh0 = vget_high_s16(q3); - dLow1 = vget_low_s16(q5); - dHigh1 = vget_high_s16(q5); - q3 = vcombine_s16(dLow0, dLow1); - q5 = vcombine_s16(dHigh0, dHigh1); - - q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); - q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); - q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); - q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); - - q10 = vqaddq_s16(q2, q3); - q11 = vqsubq_s16(q2, q3); - - q8 = vshrq_n_s16(q8, 1); - q9 = vshrq_n_s16(q9, 1); - - q4 = vqaddq_s16(q4, q8); - q5 = vqaddq_s16(q5, q9); - - q2 = vqsubq_s16(q6, q5); - q3 = vqaddq_s16(q7, q4); - - q4 = vqaddq_s16(q10, q3); - q5 = vqaddq_s16(q11, q2); - q6 = vqsubq_s16(q11, q2); - q7 = vqsubq_s16(q10, q3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - // loop 2 - q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); - q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); - q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); - q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); - - q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); - q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); - - q10 = vshrq_n_s16(q10, 1); - q11 = vshrq_n_s16(q11, 1); - - q10 = vqaddq_s16(q2tmp2.val[1], q10); - q11 = vqaddq_s16(q2tmp3.val[1], q11); - - q8 = vqsubq_s16(q8, q11); - q9 = vqaddq_s16(q9, q10); - - q4 = vqaddq_s16(q2, q9); - q5 = vqaddq_s16(q3, q8); - q6 = vqsubq_s16(q3, q8); - q7 = vqsubq_s16(q2, q9); - - q4 = vrshrq_n_s16(q4, 3); - q5 = vrshrq_n_s16(q5, 3); - q6 = vrshrq_n_s16(q6, 3); - q7 = vrshrq_n_s16(q7, 3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), - vreinterpret_u8_s32(d28))); - q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), - vreinterpret_u8_s32(d29))); - q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), - vreinterpret_u8_s32(d30))); - q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), - vreinterpret_u8_s32(d31))); - - d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); - d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); - d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); - d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); - - dst0 = dst; - dst1 = dst + 4; - vst1_lane_s32((int32_t *)dst0, d28, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d28, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d29, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d29, 1); - dst1 += stride; - - vst1_lane_s32((int32_t *)dst0, d30, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d30, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d31, 0); - vst1_lane_s32((int32_t *)dst1, d31, 1); - return; -} diff --git a/source/libvpx/vp8/common/arm/reconintra_arm.c b/source/libvpx/vp8/common/arm/reconintra_arm.c index 2874896..e55a33c 100644 --- a/source/libvpx/vp8/common/arm/reconintra_arm.c +++ b/source/libvpx/vp8/common/arm/reconintra_arm.c @@ -14,7 +14,7 @@ #include "vp8/common/blockd.h" #include "vpx_mem/vpx_mem.h" -#if HAVE_NEON +#if HAVE_NEON_ASM extern void vp8_build_intra_predictors_mby_neon_func( unsigned char *y_buffer, unsigned char *ypred_ptr, diff --git a/source/libvpx/vp8/common/arm/variance_arm.c b/source/libvpx/vp8/common/arm/variance_arm.c index 467a509..e3f7083 100644 --- a/source/libvpx/vp8/common/arm/variance_arm.c +++ b/source/libvpx/vp8/common/arm/variance_arm.c @@ -95,7 +95,7 @@ unsigned int vp8_sub_pixel_variance16x16_armv6 #endif /* HAVE_MEDIA */ -#if HAVE_NEON +#if HAVE_NEON_ASM extern unsigned int vp8_sub_pixel_variance16x16_neon_func ( diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl index 130d965..cbfd76a 100644 --- a/source/libvpx/vp8/common/rtcd_defs.pl +++ b/source/libvpx/vp8/common/rtcd_defs.pl @@ -38,13 +38,15 @@ $vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6; $vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2; add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; -specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/; +specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon_asm dspr2/; $vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6; +$vp8_dequant_idct_add_y_block_neon_asm=vp8_dequant_idct_add_y_block_neon; $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; -specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/; +specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon_asm dspr2/; $vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6; +$vp8_dequant_idct_add_uv_block_neon_asm=vp8_dequant_idct_add_uv_block_neon; $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; # @@ -56,8 +58,9 @@ $vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6; $vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2; add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; -specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/; +specialize qw/vp8_loop_filter_bv mmx sse2 media neon_asm dspr2/; $vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6; +$vp8_loop_filter_bv_neon_asm=vp8_loop_filter_bv_neon; $vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2; add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; @@ -66,18 +69,19 @@ $vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6; $vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2; add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; -specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/; +specialize qw/vp8_loop_filter_bh mmx sse2 media neon_asm dspr2/; $vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6; +$vp8_loop_filter_bh_neon_asm=vp8_loop_filter_bh_neon; $vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2; add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; -specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/; +specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon_asm/; $vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; $vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx; $vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; $vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6; -$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; +$vp8_loop_filter_simple_mbv_neon_asm=vp8_loop_filter_mbvs_neon; add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/; @@ -88,12 +92,12 @@ $vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6; $vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; -specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/; +specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon_asm/; $vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; $vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx; $vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; $vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6; -$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; +$vp8_loop_filter_simple_bv_neon_asm=vp8_loop_filter_bvs_neon; add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/; @@ -269,9 +273,10 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/; $vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt; add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; -specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/; +specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/; $vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt; $vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6; +$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon; add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/; @@ -282,24 +287,28 @@ specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/; $vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt; add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; -specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/; +specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon_asm/; $vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt; $vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6; +$vp8_sub_pixel_variance16x16_neon_asm=vp8_sub_pixel_variance16x16_neon; add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/; +specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon_asm/; $vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt; $vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6; +$vp8_variance_halfpixvar16x16_h_neon_asm=vp8_variance_halfpixvar16x16_h_neon; add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/; +specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon_asm/; $vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt; $vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6; +$vp8_variance_halfpixvar16x16_v_neon_asm=vp8_variance_halfpixvar16x16_v_neon; add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/; +specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon_asm/; $vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt; $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6; +$vp8_variance_halfpixvar16x16_hv_neon_asm=vp8_variance_halfpixvar16x16_hv_neon; # # Single block SAD @@ -402,12 +411,14 @@ specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/; $vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt; add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp8_mse16x16 mmx sse2 media neon/; +specialize qw/vp8_mse16x16 mmx sse2 media neon_asm/; $vp8_mse16x16_sse2=vp8_mse16x16_wmt; $vp8_mse16x16_media=vp8_mse16x16_armv6; +$vp8_mse16x16_neon_asm=vp8_mse16x16_neon; add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; -specialize qw/vp8_get4x4sse_cs mmx neon/; +specialize qw/vp8_get4x4sse_cs mmx neon_asm/; +$vp8_get4x4sse_cs_neon_asm=vp8_get4x4sse_cs_neon; # # Block copy @@ -434,16 +445,19 @@ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { # Forward DCT # add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch"; -specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/; +specialize qw/vp8_short_fdct4x4 mmx sse2 media neon_asm/; $vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6; +$vp8_short_fdct4x4_neon_asm=vp8_short_fdct4x4_neon; add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch"; -specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/; +specialize qw/vp8_short_fdct8x4 mmx sse2 media neon_asm/; $vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6; +$vp8_short_fdct8x4_neon_asm=vp8_short_fdct8x4_neon; add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; -specialize qw/vp8_short_walsh4x4 sse2 media neon/; +specialize qw/vp8_short_walsh4x4 sse2 media neon_asm/; $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; +$vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon; # # Quantizer @@ -454,14 +468,16 @@ specialize qw/vp8_regular_quantize_b sse2/; #$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4; add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; -specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/; +specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/; $vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6; +$vp8_fast_quantize_b_neon_asm=vp8_fast_quantize_b_neon; add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; # no asm yet add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; -specialize qw/vp8_fast_quantize_b_pair neon/; +specialize qw/vp8_fast_quantize_b_pair neon_asm/; +$vp8_fast_quantize_b_pair_neon_asm=vp8_fast_quantize_b_pair_neon; add_proto qw/void vp8_quantize_mb/, "struct macroblock *"; specialize qw/vp8_quantize_mb neon/; @@ -488,16 +504,19 @@ specialize qw/vp8_mbuverror mmx sse2/; $vp8_mbuverror_sse2=vp8_mbuverror_xmm; add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch"; -specialize qw/vp8_subtract_b mmx sse2 media neon/; +specialize qw/vp8_subtract_b mmx sse2 media neon_asm/; $vp8_subtract_b_media=vp8_subtract_b_armv6; +$vp8_subtract_b_neon_asm=vp8_subtract_b_neon; add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"; -specialize qw/vp8_subtract_mby mmx sse2 media neon/; +specialize qw/vp8_subtract_mby mmx sse2 media neon_asm/; $vp8_subtract_mby_media=vp8_subtract_mby_armv6; +$vp8_subtract_mby_neon_asm=vp8_subtract_mby_neon; add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"; -specialize qw/vp8_subtract_mbuv mmx sse2 media neon/; +specialize qw/vp8_subtract_mbuv mmx sse2 media neon_asm/; $vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6; +$vp8_subtract_mbuv_neon_asm=vp8_subtract_mbuv_neon; # # Motion search @@ -526,13 +545,14 @@ if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { # Pick Loopfilter # add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; -specialize qw/vp8_yv12_copy_partial_frame neon/; +specialize qw/vp8_yv12_copy_partial_frame neon_asm/; +$vp8_yv12_copy_partial_frame_neon_asm=vp8_yv12_copy_partial_frame_neon; # # Denoiser filter # if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { - add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"; + add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising"; specialize qw/vp8_denoiser_filter sse2 neon/; } diff --git a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c index 23dc0a9..32ce65a 100644 --- a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c +++ b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c @@ -45,10 +45,13 @@ * [16, 255] 3 6 7 */ -int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, - MACROBLOCK *signal, unsigned int motion_magnitude, - int y_offset, int uv_offset) { +int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, + int mc_running_avg_y_stride, + unsigned char *running_avg_y, + int running_avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude, + int increase_denoising) { /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level, level1 adjustment is * increased, the deltas stay the same. @@ -60,14 +63,6 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg, const uint8x16_t v_level1_threshold = vdupq_n_u8(4); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); - - /* Local variables for array pointers and strides. */ - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_running_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int running_avg_y_stride = running_avg->y_stride; int64x2_t v_sum_diff_total = vdupq_n_s64(0); /* Go over lines. */ diff --git a/source/libvpx/vp8/encoder/block.h b/source/libvpx/vp8/encoder/block.h index dd733e5..34879cf 100644 --- a/source/libvpx/vp8/encoder/block.h +++ b/source/libvpx/vp8/encoder/block.h @@ -125,6 +125,7 @@ typedef struct macroblock int optimize; int q_index; + int increase_denoising; #if CONFIG_TEMPORAL_DENOISING MB_PREDICTION_MODE best_sse_inter_mode; diff --git a/source/libvpx/vp8/encoder/denoising.c b/source/libvpx/vp8/encoder/denoising.c index 7819265..1e645fb 100644 --- a/source/libvpx/vp8/encoder/denoising.c +++ b/source/libvpx/vp8/encoder/denoising.c @@ -21,6 +21,7 @@ static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; */ static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; +static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60; /* * The filter function was modified to reduce the computational complexity. @@ -51,27 +52,32 @@ static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; * [16, 255] 6 7 */ -int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, - unsigned int motion_magnitude, int y_offset, - int uv_offset) +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, + unsigned char *running_avg_y, int avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude, + int increase_denoising) { - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int avg_y_stride = running_avg->y_stride; - int r, c, i; + unsigned char *running_avg_y_start = running_avg_y; + unsigned char *sig_start = sig; + int sum_diff_thresh; + int r, c; int sum_diff = 0; int adj_val[3] = {3, 4, 6}; - + int shift_inc1 = 0; + int shift_inc2 = 1; /* If motion_magnitude is small, making the denoiser more aggressive by - * increasing the adjustment for each level. */ + * increasing the adjustment for each level. Add another increment for + * blocks that are labeled for increase denoising. */ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { - for (i = 0; i < 3; i++) - adj_val[i] += 1; + if (increase_denoising) { + shift_inc1 = 1; + shift_inc2 = 2; + } + adj_val[0] += shift_inc2; + adj_val[1] += shift_inc2; + adj_val[2] += shift_inc2; } for (r = 0; r < 16; ++r) @@ -85,8 +91,9 @@ int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, diff = mc_running_avg_y[c] - sig[c]; absdiff = abs(diff); - /* When |diff| < 4, use pixel value from last denoised raw. */ - if (absdiff <= 3) + // When |diff| <= |3 + shift_inc1|, use pixel value from + // last denoised raw. + if (absdiff <= 3 + shift_inc1) { running_avg_y[c] = mc_running_avg_y[c]; sum_diff += diff; @@ -127,11 +134,12 @@ int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, running_avg_y += avg_y_stride; } - if (abs(sum_diff) > SUM_DIFF_THRESHOLD) + sum_diff_thresh= SUM_DIFF_THRESHOLD; + if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; + if (abs(sum_diff) > sum_diff_thresh) return COPY_BLOCK; - vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, - signal->thismb, sig_stride); + vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } @@ -192,7 +200,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, int mv_row; int mv_col; unsigned int motion_magnitude2; - + unsigned int sse_thresh; MV_REFERENCE_FRAME frame = x->best_reference_frame; MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; @@ -277,7 +285,10 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, mv_row = x->best_sse_mv.as_mv.row; mv_col = x->best_sse_mv.as_mv.col; motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; - if (best_sse > SSE_THRESHOLD || motion_magnitude2 + sse_thresh = SSE_THRESHOLD; + if (x->increase_denoising) sse_thresh = SSE_THRESHOLD_HIGH; + + if (best_sse > sse_thresh || motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD) { decision = COPY_BLOCK; @@ -285,12 +296,18 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, if (decision == FILTER_BLOCK) { + unsigned char *mc_running_avg_y = + denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset; + int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride; + unsigned char *running_avg_y = + denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset; + int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride; + /* Filter. */ - decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg, - &denoiser->yv12_running_avg[INTRA_FRAME], - x, - motion_magnitude2, - recon_yoffset, recon_uvoffset); + decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride, + running_avg_y, avg_y_stride, + x->thismb, 16, motion_magnitude2, + x->increase_denoising); } if (decision == COPY_BLOCK) { diff --git a/source/libvpx/vp8/encoder/denoising.h b/source/libvpx/vp8/encoder/denoising.h index cc9913a..ae744d2 100644 --- a/source/libvpx/vp8/encoder/denoising.h +++ b/source/libvpx/vp8/encoder/denoising.h @@ -18,6 +18,7 @@ extern "C" { #endif #define SUM_DIFF_THRESHOLD (16 * 16 * 2) +#define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3) #define MOTION_MAGNITUDE_THRESHOLD (8*3) enum vp8_denoiser_decision diff --git a/source/libvpx/vp8/encoder/pickinter.c b/source/libvpx/vp8/encoder/pickinter.c index 39a3baf..cf6a82f 100644 --- a/source/libvpx/vp8/encoder/pickinter.c +++ b/source/libvpx/vp8/encoder/pickinter.c @@ -1177,6 +1177,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->best_reference_frame = best_mbmode.ref_frame; best_sse = best_rd_sse; } + x->increase_denoising = 0; vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, recon_yoffset, recon_uvoffset); diff --git a/source/libvpx/vp8/encoder/x86/denoising_sse2.c b/source/libvpx/vp8/encoder/x86/denoising_sse2.c index cceb826..5112f89 100644 --- a/source/libvpx/vp8/encoder/x86/denoising_sse2.c +++ b/source/libvpx/vp8/encoder/x86/denoising_sse2.c @@ -22,26 +22,28 @@ union sum_union { signed char e[16]; }; -int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, - MACROBLOCK *signal, unsigned int motion_magnitude, - int y_offset, int uv_offset) +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, + int mc_avg_y_stride, + unsigned char *running_avg_y, int avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude, + int increase_denoising) { - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int avg_y_stride = running_avg->y_stride; + unsigned char *running_avg_y_start = running_avg_y; + unsigned char *sig_start = sig; + int sum_diff_thresh; int r; + int shift_inc = (increase_denoising && + motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; __m128i acc_diff = _mm_setzero_si128(); const __m128i k_0 = _mm_setzero_si128(); - const __m128i k_4 = _mm_set1_epi8(4); + const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); const __m128i k_8 = _mm_set1_epi8(8); const __m128i k_16 = _mm_set1_epi8(16); /* Modify each level's adjustment according to motion_magnitude. */ const __m128i l3 = _mm_set1_epi8( - (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6); + (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? + 7 + shift_inc : 6); /* Difference between level 3 and level 2 is 2. */ const __m128i l32 = _mm_set1_epi8(2); /* Difference between level 2 and level 1 is 1. */ @@ -108,13 +110,14 @@ int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11] + s.e[12] + s.e[13] + s.e[14] + s.e[15]; - if (abs(sum_diff) > SUM_DIFF_THRESHOLD) + sum_diff_thresh = SUM_DIFF_THRESHOLD; + if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; + if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } - vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, - signal->thismb, sig_stride); + vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } diff --git a/source/libvpx/vp8/vp8_common.mk b/source/libvpx/vp8/vp8_common.mk index 2812111..8282547 100644 --- a/source/libvpx/vp8/vp8_common.mk +++ b/source/libvpx/vp8/vp8_common.mk @@ -129,7 +129,6 @@ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c # common (c) VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c -VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/variance_arm.c @@ -159,13 +158,16 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM) # common (neon) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) +#VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/reconintra_arm.c +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/loopfilter_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) +#VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/idct_blk_neon.c +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/idct_dequant_0_2x_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/idct_dequant_full_2x_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) # common (neon intrinsics) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c @@ -173,14 +175,12 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon.c $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl)) diff --git a/source/libvpx/vp8/vp8_cx_iface.c b/source/libvpx/vp8/vp8_cx_iface.c index 6ca6087..501dd3e 100644 --- a/source/libvpx/vp8/vp8_cx_iface.c +++ b/source/libvpx/vp8/vp8_cx_iface.c @@ -886,7 +886,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; /* Add the frame packet to the list of returned packets. */ - round = (vpx_codec_pts_t)1000000 + round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; diff --git a/source/libvpx/vp8/vp8cx_arm.mk b/source/libvpx/vp8/vp8cx_arm.mk index 398172a..5733048 100644 --- a/source/libvpx/vp8/vp8cx_arm.mk +++ b/source/libvpx/vp8/vp8cx_arm.mk @@ -35,11 +35,12 @@ VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM) #File list for neon # encoder -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/picklpf_arm.c +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/fastquantizeb_neon$(ASM) +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/picklpf_arm.c +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/shortfdct_neon$(ASM) +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/subtract_neon$(ASM) +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_mse16x16_neon$(ASM) +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_memcpy_neon$(ASM) +VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) + VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_mse16x16_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_memcpy_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) diff --git a/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm b/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm index 5476400..ab5bb69 100644 --- a/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm +++ b/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm @@ -9,7 +9,7 @@ ; EXPORT |vp9_idct8x8_64_add_neon| - EXPORT |vp9_idct8x8_10_add_neon| + EXPORT |vp9_idct8x8_12_add_neon| ARM REQUIRE8 PRESERVE8 @@ -310,13 +310,13 @@ bx lr ENDP ; |vp9_idct8x8_64_add_neon| -;void vp9_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_idct8x8_10_add_neon| PROC +|vp9_idct8x8_12_add_neon| PROC push {r4-r9} vpush {d8-d15} vld1.s16 {q8,q9}, [r0]! @@ -514,6 +514,6 @@ vpop {d8-d15} pop {r4-r9} bx lr - ENDP ; |vp9_idct8x8_10_add_neon| + ENDP ; |vp9_idct8x8_12_add_neon| END diff --git a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c index acccaea..fc44ffa 100644 --- a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c +++ b/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c @@ -617,7 +617,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, } } -void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest, +void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, int dest_stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; diff --git a/source/libvpx/vp9/common/vp9_common.h b/source/libvpx/vp9/common/vp9_common.h index 2dccb70..04db7c0 100644 --- a/source/libvpx/vp9/common/vp9_common.h +++ b/source/libvpx/vp9/common/vp9_common.h @@ -45,7 +45,7 @@ extern "C" { vpx_memcpy(dest, src, n * sizeof(*src)); \ } -#define vp9_zero(dest) vpx_memset(&dest, 0, sizeof(dest)) +#define vp9_zero(dest) vpx_memset(&(dest), 0, sizeof(dest)) #define vp9_zero_array(dest, n) vpx_memset(dest, 0, n * sizeof(*dest)) static INLINE uint8_t clip_pixel(int val) { diff --git a/source/libvpx/vp9/common/vp9_convolve.c b/source/libvpx/vp9/common/vp9_convolve.c index d30e0b4..1a8c49d 100644 --- a/source/libvpx/vp9/common/vp9_convolve.c +++ b/source/libvpx/vp9/common/vp9_convolve.c @@ -156,6 +156,9 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const InterpKernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); + (void)filter_y; + (void)y_step_q4; + convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4, w, h); } @@ -168,6 +171,9 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const InterpKernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); + (void)filter_y; + (void)y_step_q4; + convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4, w, h); } @@ -179,6 +185,10 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, int w, int h) { const InterpKernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); + + (void)filter_x; + (void)x_step_q4; + convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); } @@ -190,6 +200,10 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, int w, int h) { const InterpKernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); + + (void)filter_x; + (void)x_step_q4; + convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); } @@ -232,6 +246,9 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, int w, int h) { int r; + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + for (r = h; r > 0; --r) { vpx_memcpy(dst, src, w); src += src_stride; @@ -246,6 +263,9 @@ void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, int w, int h) { int x, y; + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); diff --git a/source/libvpx/vp9/common/vp9_debugmodes.c b/source/libvpx/vp9/common/vp9_debugmodes.c index 8f150a4..d2522bb 100644 --- a/source/libvpx/vp9/common/vp9_debugmodes.c +++ b/source/libvpx/vp9/common/vp9_debugmodes.c @@ -24,10 +24,9 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { */ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, size_t member_offset) { - int mi_row; - int mi_col; + int mi_row, mi_col; int mi_index = 0; - MODE_INFO **mi_8x8 = cm->mi_grid_visible; + MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; char prefix = descriptor[0]; @@ -38,7 +37,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", - *((int*) ((char *) (&mi_8x8[mi_index]->mbmi) + + *((int*) ((char *) (&mi[mi_index]->mbmi) + member_offset))); mi_index++; } @@ -52,7 +51,7 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int mi_col; int mi_index = 0; FILE *mvs = fopen(file, "a"); - MODE_INFO **mi_8x8 = cm->mi_grid_visible; + MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; @@ -67,8 +66,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row, - mi_8x8[mi_index]->mbmi.mv[0].as_mv.col); + fprintf(mvs, "%4d:%4d ", mi[mi_index]->mbmi.mv[0].as_mv.row, + mi[mi_index]->mbmi.mv[0].as_mv.col); mi_index++; } fprintf(mvs, "\n"); diff --git a/source/libvpx/vp9/common/vp9_idct.c b/source/libvpx/vp9/common/vp9_idct.c index 20b78bf..856d41e 100644 --- a/source/libvpx/vp9/common/vp9_idct.c +++ b/source/libvpx/vp9/common/vp9_idct.c @@ -421,7 +421,7 @@ void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, } } -void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { +void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[8 * 8] = { 0 }; int16_t *outptr = out; int i, j; @@ -1348,8 +1348,8 @@ void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { if (eob == 1) // DC only DCT coefficient vp9_idct8x8_1_add(input, dest, stride); - else if (eob <= 10) - vp9_idct8x8_10_add(input, dest, stride); + else if (eob <= 12) + vp9_idct8x8_12_add(input, dest, stride); else vp9_idct8x8_64_add(input, dest, stride); } diff --git a/source/libvpx/vp9/common/vp9_loopfilter.c b/source/libvpx/vp9/common/vp9_loopfilter.c index 3ac5a05..efd0249 100644 --- a/source/libvpx/vp9/common/vp9_loopfilter.c +++ b/source/libvpx/vp9/common/vp9_loopfilter.c @@ -619,12 +619,12 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, + MODE_INFO **mi, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO **mip = mi_8x8; - MODE_INFO **mip2 = mi_8x8; + MODE_INFO **mip = mi; + MODE_INFO **mip2 = mi; // These are offsets to the next mi in the 64x64 block. It is what gets // added to the mi ptr as we go through each loop. It helps us to avoids @@ -1192,39 +1192,41 @@ void vp9_filter_block_plane(VP9_COMMON *const cm, } void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, - VP9_COMMON *cm, MACROBLOCKD *xd, + VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - int mi_row, mi_col; + const int use_420 = y_only || (planes[1].subsampling_y == 1 && + planes[1].subsampling_x == 1); LOOP_FILTER_MASK lfm; - int use_420 = y_only || (xd->plane[1].subsampling_y == 1 && - xd->plane[1].subsampling_x == 1); + int mi_row, mi_col; for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; - vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { if (use_420) - vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); + vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); else - filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, + filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, mi_col); } } } } -void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; @@ -1238,7 +1240,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, } end_mi_row = start_mi_row + mi_rows_to_filter; vp9_loop_filter_frame_init(cm, frame_filter_level); - vp9_loop_filter_rows(cm->frame_to_show, cm, xd, + vp9_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); } @@ -1246,7 +1248,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int vp9_loop_filter_worker(void *arg1, void *arg2) { LFWorkerData *const lf_data = (LFWorkerData*)arg1; (void)arg2; - vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd, + vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only); return 1; } diff --git a/source/libvpx/vp9/common/vp9_loopfilter.h b/source/libvpx/vp9/common/vp9_loopfilter.h index 97ae9d2..6fa2773 100644 --- a/source/libvpx/vp9/common/vp9_loopfilter.h +++ b/source/libvpx/vp9/common/vp9_loopfilter.h @@ -104,22 +104,23 @@ void vp9_loop_filter_init(struct VP9Common *cm); // calls this function directly. void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); -void vp9_loop_filter_frame(struct VP9Common *cm, +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + struct VP9Common *cm, struct macroblockd *mbd, int filter_level, int y_only, int partial_frame); // Apply the loop filter to [start, stop) macro block rows in frame_buffer. void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, struct macroblockd *xd, + struct VP9Common *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only); typedef struct LoopFilterWorkerData { const YV12_BUFFER_CONFIG *frame_buffer; struct VP9Common *cm; - struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the - // loopfilter. the planes are necessary as their state - // is changed during decode. + struct macroblockd_plane planes[MAX_MB_PLANE]; + int start; int stop; int y_only; diff --git a/source/libvpx/vp9/common/vp9_postproc.c b/source/libvpx/vp9/common/vp9_postproc.c index 5601a93..9f32104 100644 --- a/source/libvpx/vp9/common/vp9_postproc.c +++ b/source/libvpx/vp9/common/vp9_postproc.c @@ -24,61 +24,7 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_textblit.h" -#define RGB_TO_YUV(t) \ - ( (0.257*(float)(t >> 16)) + (0.504*(float)(t >> 8 & 0xff)) + \ - (0.098*(float)(t & 0xff)) + 16), \ - (-(0.148*(float)(t >> 16)) - (0.291*(float)(t >> 8 & 0xff)) + \ - (0.439*(float)(t & 0xff)) + 128), \ - ( (0.439*(float)(t >> 16)) - (0.368*(float)(t >> 8 & 0xff)) - \ - (0.071*(float)(t & 0xff)) + 128) - -/* global constants */ -#if 0 && CONFIG_POSTPROC_VISUALIZER -static const unsigned char PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { - { RGB_TO_YUV(0x98FB98) }, /* PaleGreen */ - { RGB_TO_YUV(0x00FF00) }, /* Green */ - { RGB_TO_YUV(0xADFF2F) }, /* GreenYellow */ - { RGB_TO_YUV(0x8F0000) }, /* Dark Red */ - { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */ - { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */ - { RGB_TO_YUV(0x008F8F) }, /* Dark Cyan */ - { RGB_TO_YUV(0x8F0000) }, /* Dark Red */ - { RGB_TO_YUV(0x8F0000) }, /* Dark Red */ - { RGB_TO_YUV(0x228B22) }, /* ForestGreen */ - { RGB_TO_YUV(0x006400) }, /* DarkGreen */ - { RGB_TO_YUV(0x98F5FF) }, /* Cadet Blue */ - { RGB_TO_YUV(0x6CA6CD) }, /* Sky Blue */ - { RGB_TO_YUV(0x00008B) }, /* Dark blue */ - { RGB_TO_YUV(0x551A8B) }, /* Purple */ - { RGB_TO_YUV(0xFF0000) } /* Red */ - { RGB_TO_YUV(0xCC33FF) }, /* Magenta */ -}; - -static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = { - { RGB_TO_YUV(0x6633ff) }, /* Purple */ - { RGB_TO_YUV(0xcc33ff) }, /* Magenta */ - { RGB_TO_YUV(0xff33cc) }, /* Pink */ - { RGB_TO_YUV(0xff3366) }, /* Coral */ - { RGB_TO_YUV(0x3366ff) }, /* Blue */ - { RGB_TO_YUV(0xed00f5) }, /* Dark Blue */ - { RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */ - { RGB_TO_YUV(0xff6633) }, /* Orange */ - { RGB_TO_YUV(0x33ccff) }, /* Light Blue */ - { RGB_TO_YUV(0x8ab800) }, /* Green */ - { RGB_TO_YUV(0xffcc33) }, /* Light Orange */ - { RGB_TO_YUV(0x33ffcc) }, /* Aqua */ - { RGB_TO_YUV(0x66ff33) }, /* Light Green */ - { RGB_TO_YUV(0xccff33) }, /* Yellow */ -}; - -static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = { - { RGB_TO_YUV(0x00ff00) }, /* Blue */ - { RGB_TO_YUV(0x0000ff) }, /* Green */ - { RGB_TO_YUV(0xffff00) }, /* Yellow */ - { RGB_TO_YUV(0xff0000) }, /* Red */ -}; -#endif - +#if CONFIG_VP9_POSTPROC static const short kernel5[] = { 1, 1, 4, 1, 1 }; @@ -448,163 +394,6 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise, } } -/* Blend the macro block with a solid colored square. Leave the - * edges unblended to give distinction to macro blocks in areas - * filled with the same color block. - */ -void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v, - int y1, int u1, int v1, int alpha, int stride) { - int i, j; - int y1_const = y1 * ((1 << 16) - alpha); - int u1_const = u1 * ((1 << 16) - alpha); - int v1_const = v1 * ((1 << 16) - alpha); - - y += 2 * stride + 2; - for (i = 0; i < 12; i++) { - for (j = 0; j < 12; j++) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - u += stride + 1; - v += stride + 1; - - for (i = 0; i < 6; i++) { - for (j = 0; j < 6; j++) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - } -} - -/* Blend only the edge of the macro block. Leave center - * unblended to allow for other visualizations to be layered. - */ -void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v, - int y1, int u1, int v1, int alpha, int stride) { - int i, j; - int y1_const = y1 * ((1 << 16) - alpha); - int u1_const = u1 * ((1 << 16) - alpha); - int v1_const = v1 * ((1 << 16) - alpha); - - for (i = 0; i < 2; i++) { - for (j = 0; j < 16; j++) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - for (i = 0; i < 12; i++) { - y[0] = (y[0] * alpha + y1_const) >> 16; - y[1] = (y[1] * alpha + y1_const) >> 16; - y[14] = (y[14] * alpha + y1_const) >> 16; - y[15] = (y[15] * alpha + y1_const) >> 16; - y += stride; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 16; j++) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - for (j = 0; j < 8; j++) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - - for (i = 0; i < 6; i++) { - u[0] = (u[0] * alpha + u1_const) >> 16; - v[0] = (v[0] * alpha + v1_const) >> 16; - - u[7] = (u[7] * alpha + u1_const) >> 16; - v[7] = (v[7] * alpha + v1_const) >> 16; - - u += stride; - v += stride; - } - - for (j = 0; j < 8; j++) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } -} - -void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v, - int y1, int u1, int v1, int alpha, int stride) { - int i, j; - int y1_const = y1 * ((1 << 16) - alpha); - int u1_const = u1 * ((1 << 16) - alpha); - int v1_const = v1 * ((1 << 16) - alpha); - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - } -} - -static void constrain_line(int x0, int *x1, int y0, int *y1, - int width, int height) { - int dx; - int dy; - - if (*x1 > width) { - dx = *x1 - x0; - dy = *y1 - y0; - - *x1 = width; - if (dx) - *y1 = ((width - x0) * dy) / dx + y0; - } - if (*x1 < 0) { - dx = *x1 - x0; - dy = *y1 - y0; - - *x1 = 0; - if (dx) - *y1 = ((0 - x0) * dy) / dx + y0; - } - if (*y1 > height) { - dx = *x1 - x0; - dy = *y1 - y0; - - *y1 = height; - if (dy) - *x1 = ((height - y0) * dx) / dy + x0; - } - if (*y1 < 0) { - dx = *x1 - x0; - dy = *y1 - y0; - - *y1 = 0; - if (dy) - *x1 = ((0 - y0) * dx) / dy + x0; - } -} - int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) { const int q = MIN(63, cm->lf.filter_level * 10 / 6); @@ -643,328 +432,6 @@ int vp9_post_proc_frame(struct VP9Common *cm, ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); } -#if 0 && CONFIG_POSTPROC_VISUALIZER - if (flags & VP9D_DEBUG_TXT_FRAME_INFO) { - char message[512]; - snprintf(message, sizeof(message) -1, - "F%1dG%1dQ%3dF%3dP%d_s%dx%d", - (cm->frame_type == KEY_FRAME), - cm->refresh_golden_frame, - cm->base_qindex, - cm->filter_level, - flags, - cm->mb_cols, cm->mb_rows); - vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride); - } - - if (flags & VP9D_DEBUG_TXT_MBLK_MODES) { - int i, j; - uint8_t *y_ptr; - int mb_rows = ppbuf->y_height >> 4; - int mb_cols = ppbuf->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = cm->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - /* vp9_filter each macro block */ - for (i = 0; i < mb_rows; i++) { - for (j = 0; j < mb_cols; j++) { - char zz[4]; - - snprintf(zz, sizeof(zz) - 1, "%c", mi[mb_index].mbmi.mode + 'a'); - - vp9_blit_text(zz, y_ptr, post->y_stride); - mb_index++; - y_ptr += 16; - } - - mb_index++; /* border */ - y_ptr += post->y_stride * 16 - post->y_width; - } - } - - if (flags & VP9D_DEBUG_TXT_DC_DIFF) { - int i, j; - uint8_t *y_ptr; - int mb_rows = ppbuf->y_height >> 4; - int mb_cols = ppbuf->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = cm->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - /* vp9_filter each macro block */ - for (i = 0; i < mb_rows; i++) { - for (j = 0; j < mb_cols; j++) { - char zz[4]; - int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED && - mi[mb_index].mbmi.mode != SPLITMV && - mi[mb_index].mbmi.skip); - - if (cm->frame_type == KEY_FRAME) - snprintf(zz, sizeof(zz) - 1, "a"); - else - snprintf(zz, sizeof(zz) - 1, "%c", dc_diff + '0'); - - vp9_blit_text(zz, y_ptr, post->y_stride); - mb_index++; - y_ptr += 16; - } - - mb_index++; /* border */ - y_ptr += post->y_stride * 16 - post->y_width; - } - } - - if (flags & VP9D_DEBUG_TXT_RATE_INFO) { - char message[512]; - snprintf(message, sizeof(message), - "Bitrate: %10.2f framerate: %10.2f ", - cm->bitrate, cm->framerate); - vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride); - } - - /* Draw motion vectors */ - if ((flags & VP9D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) { - int width = ppbuf->y_width; - int height = ppbuf->y_height; - uint8_t *y_buffer = ppbuf->y_buffer; - int y_stride = ppbuf->y_stride; - MODE_INFO *mi = cm->mi; - int x0, y0; - - for (y0 = 0; y0 < height; y0 += 16) { - for (x0 = 0; x0 < width; x0 += 16) { - int x1, y1; - - if (!(ppflags->display_mv_flag & (1 << mi->mbmi.mode))) { - mi++; - continue; - } - - if (mi->mbmi.mode == SPLITMV) { - switch (mi->mbmi.partitioning) { - case PARTITIONING_16X8 : { /* mv_top_bottom */ - union b_mode_info *bmi = &mi->bmi[0]; - MV *mv = &bmi->mv.as_mv; - - x1 = x0 + 8 + (mv->col >> 3); - y1 = y0 + 4 + (mv->row >> 3); - - constrain_line(x0 + 8, &x1, y0 + 4, &y1, width, height); - vp9_blit_line(x0 + 8, x1, y0 + 4, y1, y_buffer, y_stride); - - bmi = &mi->bmi[8]; - - x1 = x0 + 8 + (mv->col >> 3); - y1 = y0 + 12 + (mv->row >> 3); - - constrain_line(x0 + 8, &x1, y0 + 12, &y1, width, height); - vp9_blit_line(x0 + 8, x1, y0 + 12, y1, y_buffer, y_stride); - - break; - } - case PARTITIONING_8X16 : { /* mv_left_right */ - union b_mode_info *bmi = &mi->bmi[0]; - MV *mv = &bmi->mv.as_mv; - - x1 = x0 + 4 + (mv->col >> 3); - y1 = y0 + 8 + (mv->row >> 3); - - constrain_line(x0 + 4, &x1, y0 + 8, &y1, width, height); - vp9_blit_line(x0 + 4, x1, y0 + 8, y1, y_buffer, y_stride); - - bmi = &mi->bmi[2]; - - x1 = x0 + 12 + (mv->col >> 3); - y1 = y0 + 8 + (mv->row >> 3); - - constrain_line(x0 + 12, &x1, y0 + 8, &y1, width, height); - vp9_blit_line(x0 + 12, x1, y0 + 8, y1, y_buffer, y_stride); - - break; - } - case PARTITIONING_8X8 : { /* mv_quarters */ - union b_mode_info *bmi = &mi->bmi[0]; - MV *mv = &bmi->mv.as_mv; - - x1 = x0 + 4 + (mv->col >> 3); - y1 = y0 + 4 + (mv->row >> 3); - - constrain_line(x0 + 4, &x1, y0 + 4, &y1, width, height); - vp9_blit_line(x0 + 4, x1, y0 + 4, y1, y_buffer, y_stride); - - bmi = &mi->bmi[2]; - - x1 = x0 + 12 + (mv->col >> 3); - y1 = y0 + 4 + (mv->row >> 3); - - constrain_line(x0 + 12, &x1, y0 + 4, &y1, width, height); - vp9_blit_line(x0 + 12, x1, y0 + 4, y1, y_buffer, y_stride); - - bmi = &mi->bmi[8]; - - x1 = x0 + 4 + (mv->col >> 3); - y1 = y0 + 12 + (mv->row >> 3); - - constrain_line(x0 + 4, &x1, y0 + 12, &y1, width, height); - vp9_blit_line(x0 + 4, x1, y0 + 12, y1, y_buffer, y_stride); - - bmi = &mi->bmi[10]; - - x1 = x0 + 12 + (mv->col >> 3); - y1 = y0 + 12 + (mv->row >> 3); - - constrain_line(x0 + 12, &x1, y0 + 12, &y1, width, height); - vp9_blit_line(x0 + 12, x1, y0 + 12, y1, y_buffer, y_stride); - break; - } - case PARTITIONING_4X4: - default : { - union b_mode_info *bmi = mi->bmi; - int bx0, by0; - - for (by0 = y0; by0 < (y0 + 16); by0 += 4) { - for (bx0 = x0; bx0 < (x0 + 16); bx0 += 4) { - MV *mv = &bmi->mv.as_mv; - - x1 = bx0 + 2 + (mv->col >> 3); - y1 = by0 + 2 + (mv->row >> 3); - - constrain_line(bx0 + 2, &x1, by0 + 2, &y1, width, height); - vp9_blit_line(bx0 + 2, x1, by0 + 2, y1, y_buffer, y_stride); - - bmi++; - } - } - } - } - } else if (is_inter_mode(mi->mbmi.mode)) { - MV *mv = &mi->mbmi.mv.as_mv; - const int lx0 = x0 + 8; - const int ly0 = y0 + 8; - - x1 = lx0 + (mv->col >> 3); - y1 = ly0 + (mv->row >> 3); - - if (x1 != lx0 && y1 != ly0) { - constrain_line(lx0, &x1, ly0 - 1, &y1, width, height); - vp9_blit_line(lx0, x1, ly0 - 1, y1, y_buffer, y_stride); - - constrain_line(lx0, &x1, ly0 + 1, &y1, width, height); - vp9_blit_line(lx0, x1, ly0 + 1, y1, y_buffer, y_stride); - } else { - vp9_blit_line(lx0, x1, ly0, y1, y_buffer, y_stride); - } - } - - mi++; - } - mi++; - } - } - - /* Color in block modes */ - if ((flags & VP9D_DEBUG_CLR_BLK_MODES) - && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) { - int y, x; - int width = ppbuf->y_width; - int height = ppbuf->y_height; - uint8_t *y_ptr = ppbuf->y_buffer; - uint8_t *u_ptr = ppbuf->u_buffer; - uint8_t *v_ptr = ppbuf->v_buffer; - int y_stride = ppbuf->y_stride; - MODE_INFO *mi = cm->mi; - - for (y = 0; y < height; y += 16) { - for (x = 0; x < width; x += 16) { - int Y = 0, U = 0, V = 0; - - if (mi->mbmi.mode == I4X4_PRED && - ((ppflags->display_mb_modes_flag & I4X4_PRED) || - ppflags->display_b_modes_flag)) { - int by, bx; - uint8_t *yl, *ul, *vl; - union b_mode_info *bmi = mi->bmi; - - yl = y_ptr + x; - ul = u_ptr + (x >> 1); - vl = v_ptr + (x >> 1); - - for (by = 0; by < 16; by += 4) { - for (bx = 0; bx < 16; bx += 4) { - if ((ppflags->display_b_modes_flag & (1 << mi->mbmi.mode)) - || (ppflags->display_mb_modes_flag & I4X4_PRED)) { - Y = B_PREDICTION_MODE_colors[bmi->as_mode][0]; - U = B_PREDICTION_MODE_colors[bmi->as_mode][1]; - V = B_PREDICTION_MODE_colors[bmi->as_mode][2]; - - vp9_blend_b(yl + bx, ul + (bx >> 1), vl + (bx >> 1), Y, U, V, - 0xc000, y_stride); - } - bmi++; - } - - yl += y_stride * 4; - ul += y_stride * 1; - vl += y_stride * 1; - } - } else if (ppflags->display_mb_modes_flag & (1 << mi->mbmi.mode)) { - Y = PREDICTION_MODE_colors[mi->mbmi.mode][0]; - U = PREDICTION_MODE_colors[mi->mbmi.mode][1]; - V = PREDICTION_MODE_colors[mi->mbmi.mode][2]; - - vp9_blend_mb_inner(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1), - Y, U, V, 0xc000, y_stride); - } - - mi++; - } - y_ptr += y_stride * 16; - u_ptr += y_stride * 4; - v_ptr += y_stride * 4; - - mi++; - } - } - - /* Color in frame reference blocks */ - if ((flags & VP9D_DEBUG_CLR_FRM_REF_BLKS) && - ppflags->display_ref_frame_flag) { - int y, x; - int width = ppbuf->y_width; - int height = ppbuf->y_height; - uint8_t *y_ptr = ppbuf->y_buffer; - uint8_t *u_ptr = ppbuf->u_buffer; - uint8_t *v_ptr = ppbuf->v_buffer; - int y_stride = ppbuf->y_stride; - MODE_INFO *mi = cm->mi; - - for (y = 0; y < height; y += 16) { - for (x = 0; x < width; x += 16) { - int Y = 0, U = 0, V = 0; - - if (ppflags->display_ref_frame_flag & (1 << mi->mbmi.ref_frame)) { - Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0]; - U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1]; - V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2]; - - vp9_blend_mb_outer(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1), - Y, U, V, 0xc000, y_stride); - } - - mi++; - } - y_ptr += y_stride * 16; - u_ptr += y_stride * 4; - v_ptr += y_stride * 4; - - mi++; - } - } -#endif - *dest = *ppbuf; /* handle problem with extending borders */ @@ -975,3 +442,4 @@ int vp9_post_proc_frame(struct VP9Common *cm, return 0; } +#endif diff --git a/source/libvpx/vp9/common/vp9_ppflags.h b/source/libvpx/vp9/common/vp9_ppflags.h index e8b04d2..1644a1b 100644 --- a/source/libvpx/vp9/common/vp9_ppflags.h +++ b/source/libvpx/vp9/common/vp9_ppflags.h @@ -33,12 +33,6 @@ typedef struct { int post_proc_flag; int deblocking_level; int noise_level; -#if CONFIG_POSTPROC_VISUALIZER - int display_ref_frame_flag; - int display_mb_modes_flag; - int display_b_modes_flag; - int display_mv_flag; -#endif // CONFIG_POSTPROC_VISUALIZER } vp9_ppflags_t; #ifdef __cplusplus diff --git a/source/libvpx/vp9/common/vp9_reconinter.c b/source/libvpx/vp9/common/vp9_reconinter.c index e722d6a..edc36d7 100644 --- a/source/libvpx/vp9/common/vp9_reconinter.c +++ b/source/libvpx/vp9/common/vp9_reconinter.c @@ -409,7 +409,7 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, } } -void vp9_setup_dst_planes(MACROBLOCKD *xd, +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, @@ -419,7 +419,7 @@ void vp9_setup_dst_planes(MACROBLOCKD *xd, int i; for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; + struct macroblockd_plane *const pd = &planes[i]; setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, pd->subsampling_x, pd->subsampling_y); } diff --git a/source/libvpx/vp9/common/vp9_reconinter.h b/source/libvpx/vp9/common/vp9_reconinter.h index 86f3158..58c596e 100644 --- a/source/libvpx/vp9/common/vp9_reconinter.h +++ b/source/libvpx/vp9/common/vp9_reconinter.h @@ -57,7 +57,8 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, dst->stride = stride; } -void vp9_setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src, +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col); void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c index 32e4551..403e105 100644 --- a/source/libvpx/vp9/common/vp9_reconintra.c +++ b/source/libvpx/vp9/common/vp9_reconintra.c @@ -31,6 +31,9 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { ADST_ADST, // TM }; +// This serves as a wrapper function, so that all the prediction functions +// can be unified and accessed as a pointer array. Note that the boundary +// above and left are not necessarily used all the time. #define intra_pred_sized(type, size) \ void vp9_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ ptrdiff_t stride, \ @@ -48,7 +51,7 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - + (void) above; // first column for (r = 0; r < bs - 1; ++r) dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1); @@ -77,6 +80,7 @@ intra_pred_allsizes(d207) static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; + (void) left; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] + @@ -92,6 +96,7 @@ intra_pred_allsizes(d63) static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; + (void) left; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] + @@ -184,6 +189,7 @@ intra_pred_allsizes(d153) static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; + (void) left; for (r = 0; r < bs; r++) { vpx_memcpy(dst, above, bs); @@ -195,6 +201,7 @@ intra_pred_allsizes(v) static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; + (void) above; for (r = 0; r < bs; r++) { vpx_memset(dst, left[r], bs); @@ -219,6 +226,8 @@ intra_pred_allsizes(tm) static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; + (void) above; + (void) left; for (r = 0; r < bs; r++) { vpx_memset(dst, 128, bs); @@ -231,6 +240,7 @@ static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i, r, expected_dc, sum = 0; + (void) above; for (i = 0; i < bs; i++) sum += left[i]; @@ -246,6 +256,7 @@ intra_pred_allsizes(dc_left) static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i, r, expected_dc, sum = 0; + (void) left; for (i = 0; i < bs; i++) sum += above[i]; diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl index 63380d6..1037bfb 100644 --- a/source/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -58,7 +58,8 @@ add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc"; +specialize qw/vp9_h_predictor_4x4 neon_asm dspr2/, "$ssse3_x86inc"; +$vp9_h_predictor_4x4_neon_asm=vp9_h_predictor_4x4_neon; add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d117_predictor_4x4/; @@ -70,10 +71,12 @@ add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc"; +specialize qw/vp9_v_predictor_4x4 neon_asm/, "$sse_x86inc"; +$vp9_v_predictor_4x4_neon_asm=vp9_v_predictor_4x4_neon; add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc"; +specialize qw/vp9_tm_predictor_4x4 neon_asm dspr2/, "$sse_x86inc"; +$vp9_tm_predictor_4x4_neon_asm=vp9_tm_predictor_4x4_neon; add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc"; @@ -97,7 +100,8 @@ add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc"; +specialize qw/vp9_h_predictor_8x8 neon_asm dspr2/, "$ssse3_x86inc"; +$vp9_h_predictor_8x8_neon_asm=vp9_h_predictor_8x8_neon; add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d117_predictor_8x8/; @@ -109,10 +113,12 @@ add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc"; +specialize qw/vp9_v_predictor_8x8 neon_asm/, "$sse_x86inc"; +$vp9_v_predictor_8x8_neon_asm=vp9_v_predictor_8x8_neon; add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc"; +specialize qw/vp9_tm_predictor_8x8 neon_asm dspr2/, "$sse2_x86inc"; +$vp9_tm_predictor_8x8_neon_asm=vp9_tm_predictor_8x8_neon; add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc"; @@ -136,7 +142,8 @@ add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, c specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc"; +specialize qw/vp9_h_predictor_16x16 neon_asm dspr2/, "$ssse3_x86inc"; +$vp9_h_predictor_16x16_neon_asm=vp9_h_predictor_16x16_neon; add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d117_predictor_16x16/; @@ -148,10 +155,12 @@ add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc"; +specialize qw/vp9_v_predictor_16x16 neon_asm/, "$sse2_x86inc"; +$vp9_v_predictor_16x16_neon_asm=vp9_v_predictor_16x16_neon; add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc"; +specialize qw/vp9_tm_predictor_16x16 neon_asm/, "$sse2_x86inc"; +$vp9_tm_predictor_16x16_neon_asm=vp9_tm_predictor_16x16_neon; add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc"; @@ -175,7 +184,8 @@ add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc"; +specialize qw/vp9_h_predictor_32x32 neon_asm/, "$ssse3_x86inc"; +$vp9_h_predictor_32x32_neon_asm=vp9_h_predictor_32x32_neon; add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d117_predictor_32x32/; @@ -187,10 +197,12 @@ add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vp9_d153_predictor_32x32/; add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc"; +specialize qw/vp9_v_predictor_32x32 neon_asm/, "$sse2_x86inc"; +$vp9_v_predictor_32x32_neon_asm=vp9_v_predictor_32x32_neon; add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64"; +specialize qw/vp9_tm_predictor_32x32 neon_asm/, "$sse2_x86_64"; +$vp9_tm_predictor_32x32_neon_asm=vp9_tm_predictor_32x32_neon; add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc"; @@ -208,37 +220,48 @@ specialize qw/vp9_dc_128_predictor_32x32/; # Loopfilter # add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/; +specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/; +$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon; add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/; +specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/; +$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon; add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/; +specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/; +$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon; add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/; +specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/; +$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon; add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/; +specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/; +$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon; add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/; +specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/; +$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon; add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/; +specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/; +$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon; add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/; +specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/; +$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon; add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/; +specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/; +$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon; add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/; +specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/; +$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon; add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/; +specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/; +$vp9_lpf_horizontal_4_dual_neon_asm=vp9_lpf_horizontal_4_dual_neon; # # post proc @@ -274,71 +297,91 @@ specialize qw/vp9_blend_b/; # Sub Pixel Filters # add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc"; +specialize qw/vp9_convolve_copy neon_asm dspr2/, "$sse2_x86inc"; +$vp9_convolve_copy_neon_asm=vp9_convolve_copy_neon; add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc"; +specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc"; +$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/; +specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/; +$vp9_convolve8_neon_asm=vp9_convolve8_neon; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/; +specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/; +$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/; +specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/; +$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/; +specialize qw/vp9_convolve8_avg sse2 ssse3 neon_asm dspr2/; +$vp9_convolve8_avg_neon_asm=vp9_convolve8_avg_neon; add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/; +specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon_asm dspr2/; +$vp9_convolve8_avg_horiz_neon_asm=vp9_convolve8_avg_horiz_neon; add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/; +specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon_asm dspr2/; +$vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon; # # dct # add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/; +specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/; +$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon; add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/; +specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/; +$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon; add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/; +specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/; +$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon; add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64"; +specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; +$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon; -add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/; +add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64"; +$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon; add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/; +specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/; +$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon; add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/; +specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/; +$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon; add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/; +specialize qw/vp9_idct16x16_10_add sse2 neon_asm dspr2/; +$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon; add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/; +specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/; +$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon; add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/; -$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon; +specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/; +$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon; add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; -specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/; +specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/; +$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon; add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; -specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/; +specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/; +$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon; add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; -specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/; +specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/; +$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon; add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; specialize qw/vp9_iht16x16_256_add sse2 dspr2/; @@ -660,7 +703,7 @@ specialize qw/vp9_get_mb_ss mmx sse2/; # ENCODEMB INVOKE add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; -specialize qw/vp9_block_error/, "$sse2_x86inc"; +specialize qw/vp9_block_error avx2/, "$sse2_x86inc"; add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; specialize qw/vp9_subtract_block/, "$sse2_x86inc"; @@ -693,7 +736,7 @@ add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int str specialize qw/vp9_fht16x16 sse2 avx2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fwht4x4/; +specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; specialize qw/vp9_fdct4x4 sse2 avx2/; diff --git a/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 13a5b5a..0231726 100644 --- a/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -995,7 +995,7 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, RECON_AND_STORE(dest, in[7]); } -void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<4); diff --git a/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm b/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm index f2a120f..2c10607 100644 --- a/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm +++ b/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm @@ -28,6 +28,29 @@ TRANSFORM_COEFFS 6270, 15137 TRANSFORM_COEFFS 3196, 16069 TRANSFORM_COEFFS 13623, 9102 +%macro PAIR_PP_COEFFS 2 +dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 +%endmacro + +%macro PAIR_MP_COEFFS 2 +dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2 +%endmacro + +%macro PAIR_MM_COEFFS 2 +dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2 +%endmacro + +PAIR_PP_COEFFS 30274, 12540 +PAIR_PP_COEFFS 6392, 32138 +PAIR_MP_COEFFS 18204, 27246 + +PAIR_PP_COEFFS 12540, 12540 +PAIR_PP_COEFFS 30274, 30274 +PAIR_PP_COEFFS 6392, 6392 +PAIR_PP_COEFFS 32138, 32138 +PAIR_MM_COEFFS 18204, 18204 +PAIR_PP_COEFFS 27246, 27246 + SECTION .text %if ARCH_X86_64 @@ -128,6 +151,7 @@ SECTION .text %endmacro INIT_XMM ssse3 +; full inverse 8x8 2D-DCT transform cglobal idct8x8_64_add, 3, 5, 13, input, output, stride mova m8, [pd_8192] mova m11, [pw_16] @@ -159,4 +183,118 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride ADD_STORE_8P_2X 6, 7, 9, 10, 12 RET + +; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero +cglobal idct8x8_12_add, 3, 5, 13, input, output, stride + mova m8, [pd_8192] + mova m11, [pw_16] + mova m12, [pw_11585x2] + + lea r3, [2 * strideq] + + mova m0, [inputq + 0] + mova m1, [inputq + 16] + mova m2, [inputq + 32] + mova m3, [inputq + 48] + + punpcklwd m0, m1 + punpcklwd m2, m3 + punpckhdq m9, m0, m2 + punpckldq m0, m2 + SWAP 2, 9 + + ; m0 -> [0], [0] + ; m1 -> [1], [1] + ; m2 -> [2], [2] + ; m3 -> [3], [3] + punpckhqdq m10, m0, m0 + punpcklqdq m0, m0 + punpckhqdq m9, m2, m2 + punpcklqdq m2, m2 + SWAP 1, 10 + SWAP 3, 9 + + pmulhrsw m0, m12 + pmulhrsw m2, [dpw_30274_12540] + pmulhrsw m1, [dpw_6392_32138] + pmulhrsw m3, [dpw_m18204_27246] + + SUM_SUB 0, 2, 9 + SUM_SUB 1, 3, 9 + + punpcklqdq m9, m3, m3 + punpckhqdq m5, m3, m9 + + SUM_SUB 3, 5, 9 + punpckhqdq m5, m3 + pmulhrsw m5, m12 + + punpckhqdq m9, m1, m5 + punpcklqdq m1, m5 + SWAP 5, 9 + + SUM_SUB 0, 5, 9 + SUM_SUB 2, 1, 9 + + punpckhqdq m3, m0, m0 + punpckhqdq m4, m1, m1 + punpckhqdq m6, m5, m5 + punpckhqdq m7, m2, m2 + + punpcklwd m0, m3 + punpcklwd m7, m2 + punpcklwd m1, m4 + punpcklwd m6, m5 + + punpckhdq m4, m0, m7 + punpckldq m0, m7 + punpckhdq m10, m1, m6 + punpckldq m5, m1, m6 + + punpckhqdq m1, m0, m5 + punpcklqdq m0, m5 + punpckhqdq m3, m4, m10 + punpcklqdq m2, m4, m10 + + + pmulhrsw m0, m12 + pmulhrsw m6, m2, [dpw_30274_30274] + pmulhrsw m4, m2, [dpw_12540_12540] + + pmulhrsw m7, m1, [dpw_32138_32138] + pmulhrsw m1, [dpw_6392_6392] + pmulhrsw m5, m3, [dpw_m18204_m18204] + pmulhrsw m3, [dpw_27246_27246] + + mova m2, m0 + SUM_SUB 0, 6, 9 + SUM_SUB 2, 4, 9 + SUM_SUB 1, 5, 9 + SUM_SUB 7, 3, 9 + + SUM_SUB 3, 5, 9 + pmulhrsw m3, m12 + pmulhrsw m5, m12 + + SUM_SUB 0, 7, 9 + SUM_SUB 2, 3, 9 + SUM_SUB 4, 5, 9 + SUM_SUB 6, 1, 9 + + SWAP 3, 6 + SWAP 1, 2 + SWAP 2, 4 + + + pxor m12, m12 + ADD_STORE_8P_2X 0, 1, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 2, 3, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 4, 5, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 6, 7, 9, 10, 12 + + RET + %endif diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c index 45ebb2f..3124158 100644 --- a/source/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c @@ -316,7 +316,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); return &xd->mi[0]->mbmi; } @@ -676,17 +676,17 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, } static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, - vp9_reader *r) { - const int num_threads = pbi->oxcf.max_threads; + int do_loopfilter_inline, vp9_reader *r) { + const int num_threads = pbi->max_threads; VP9_COMMON *const cm = &pbi->common; int mi_row, mi_col; MACROBLOCKD *xd = &pbi->mb; - if (pbi->do_loopfilter_inline) { + if (do_loopfilter_inline) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; lf_data->frame_buffer = get_frame_new_buffer(cm); lf_data->cm = cm; - lf_data->xd = pbi->mb; + vp9_copy(lf_data->planes, pbi->mb.plane); lf_data->stop = 0; lf_data->y_only = 0; vp9_loop_filter_frame_init(cm, cm->lf.filter_level); @@ -702,7 +702,7 @@ static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); } - if (pbi->do_loopfilter_inline) { + if (do_loopfilter_inline) { const int lf_start = mi_row - MI_BLOCK_SIZE; LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; @@ -723,7 +723,7 @@ static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, } } - if (pbi->do_loopfilter_inline) { + if (do_loopfilter_inline) { LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; vp9_worker_sync(&pbi->lf_worker); @@ -749,14 +749,20 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->log2_tile_rows += vp9_rb_read_bit(rb); } +typedef struct TileBuffer { + const uint8_t *data; + size_t size; + int col; // only used with multi-threaded decoding +} TileBuffer; + // Reads the next tile returning its size and adjusting '*data' accordingly // based on 'is_last'. -static size_t get_tile(const uint8_t *const data_end, - int is_last, - struct vpx_internal_error_info *error_info, - const uint8_t **data, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { +static void get_tile_buffer(const uint8_t *const data_end, + int is_last, + struct vpx_internal_error_info *error_info, + const uint8_t **data, + vpx_decrypt_cb decrypt_cb, void *decrypt_state, + TileBuffer *buf) { size_t size; if (!is_last) { @@ -779,18 +785,34 @@ static size_t get_tile(const uint8_t *const data_end, } else { size = data_end - *data; } - return size; + + buf->data = *data; + buf->size = size; + + *data += size; } -typedef struct TileBuffer { - const uint8_t *data; - size_t size; - int col; // only used with multi-threaded decoding -} TileBuffer; +static void get_tile_buffers(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + int tile_cols, int tile_rows, + TileBuffer (*tile_buffers)[1 << 6]) { + int r, c; + + for (r = 0; r < tile_rows; ++r) { + for (c = 0; c < tile_cols; ++c) { + const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); + TileBuffer *const buf = &tile_buffers[r][c]; + buf->col = c; + get_tile_buffer(data_end, is_last, &pbi->common.error, &data, + pbi->decrypt_cb, pbi->decrypt_state, buf); + } + } +} static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, - const uint8_t *data_end) { + const uint8_t *data_end, + int do_loopfilter_inline) { VP9_COMMON *const cm = &pbi->common; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; @@ -811,25 +833,12 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_cols); - // Load tile data into tile_buffers - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const int last_tile = tile_row == tile_rows - 1 && - tile_col == tile_cols - 1; - const size_t size = get_tile(data_end, last_tile, &cm->error, &data, - pbi->decrypt_cb, pbi->decrypt_state); - TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - buf->data = data; - buf->size = size; - data += size; - } - } + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); // Decode tiles using data from tile_buffers for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1 - : tile_col; + const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; const int last_tile = tile_row == tile_rows - 1 && col == tile_cols - 1; const TileBuffer *const buf = &tile_buffers[tile_row][col]; @@ -838,7 +847,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_tile_init(&tile, cm, tile_row, col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r, pbi->decrypt_cb, pbi->decrypt_state); - decode_tile(pbi, &tile, &r); + decode_tile(pbi, &tile, do_loopfilter_inline, &r); if (last_tile) end = vp9_reader_find_end(&r); @@ -887,8 +896,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); - TileBuffer tile_buffers[1 << 6]; + const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); + TileBuffer tile_buffers[1][1 << 6]; int n; int final_worker = -1; @@ -899,7 +908,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // TODO(jzern): See if we can remove the restriction of passing in max // threads to the decoder. if (pbi->num_tile_workers == 0) { - const int num_threads = pbi->oxcf.max_threads & ~1; + const int num_threads = pbi->max_threads & ~1; int i; // TODO(jzern): Allocate one less worker, as in the current code we only // use num_threads - 1 workers. @@ -933,19 +942,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, sizeof(*cm->above_seg_context) * aligned_mi_cols); // Load tile data into tile_buffers - for (n = 0; n < tile_cols; ++n) { - const size_t size = - get_tile(data_end, n == tile_cols - 1, &cm->error, &data, - pbi->decrypt_cb, pbi->decrypt_state); - TileBuffer *const buf = &tile_buffers[n]; - buf->data = data; - buf->size = size; - buf->col = n; - data += size; - } + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); // Sort the buffers based on size in descending order. - qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers); + qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]), + compare_tile_buffers); // Rearrange the tile buffers such that per-tile group the largest, and // presumably the most difficult, tile will be decoded in the main thread. @@ -954,11 +955,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, { int group_start = 0; while (group_start < tile_cols) { - const TileBuffer largest = tile_buffers[group_start]; + const TileBuffer largest = tile_buffers[0][group_start]; const int group_end = MIN(group_start + num_workers, tile_cols) - 1; - memmove(tile_buffers + group_start, tile_buffers + group_start + 1, - (group_end - group_start) * sizeof(tile_buffers[0])); - tile_buffers[group_end] = largest; + memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1, + (group_end - group_start) * sizeof(tile_buffers[0][0])); + tile_buffers[0][group_end] = largest; group_start = group_end + 1; } } @@ -970,7 +971,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; TileInfo *const tile = (TileInfo*)worker->data2; - TileBuffer *const buf = &tile_buffers[n]; + TileBuffer *const buf = &tile_buffers[0][n]; tile_data->cm = cm; tile_data->xd = pbi->mb; @@ -1278,6 +1279,7 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer( const uint8_t *data, const uint8_t *data_end, uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { + vp9_zero(*rb); rb->bit_offset = 0; rb->error_handler = error_handler; rb->error_handler_data = &pbi->common; @@ -1298,7 +1300,7 @@ int vp9_decode_frame(VP9Decoder *pbi, const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - struct vp9_read_bit_buffer rb = { 0 }; + struct vp9_read_bit_buffer rb; uint8_t clear_data[MAX_VP9_HEADER_SIZE]; const size_t first_partition_size = read_uncompressed_header(pbi, init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); @@ -1306,6 +1308,8 @@ int vp9_decode_frame(VP9Decoder *pbi, const int tile_rows = 1 << cm->log2_tile_rows; const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 && + cm->lf.filter_level; xd->cur_buf = new_fb; if (!first_partition_size) { @@ -1322,18 +1326,6 @@ int vp9_decode_frame(VP9Decoder *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt header length"); - pbi->do_loopfilter_inline = - (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; - if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, - vpx_memalign(32, sizeof(LFWorkerData))); - pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; - if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Loop filter thread creation failed"); - } - } - init_macroblockd(cm, &pbi->mb); if (cm->coding_use_prev_mi) @@ -1353,11 +1345,26 @@ int vp9_decode_frame(VP9Decoder *pbi, // TODO(jzern): remove frame_parallel_decoding_mode restriction for // single-frame tile decoding. - if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && cm->frame_parallel_decoding_mode) { *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); + // If multiple threads are used to decode tiles, then we use those threads + // to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); + if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; + if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Loop filter thread creation failed"); + } + } + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end, + do_loopfilter_inline); + if (!do_loopfilter_inline) + vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0); } new_fb->corrupted |= xd->corrupted; @@ -1370,16 +1377,17 @@ int vp9_decode_frame(VP9Decoder *pbi, "A stream must start with a complete key frame"); } - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode && - !new_fb->corrupted) { - vp9_adapt_coef_probs(cm); + if (!new_fb->corrupted) { + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { + vp9_adapt_coef_probs(cm); - if (!frame_is_intra_only(cm)) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + if (!frame_is_intra_only(cm)) { + vp9_adapt_mode_probs(cm); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + } + } else { + debug_check_frame_counts(cm); } - } else { - debug_check_frame_counts(cm); } if (cm->refresh_frame_context) diff --git a/source/libvpx/vp9/decoder/vp9_decoder.c b/source/libvpx/vp9/decoder/vp9_decoder.c index abcff9f..9e0811f 100644 --- a/source/libvpx/vp9/decoder/vp9_decoder.c +++ b/source/libvpx/vp9/decoder/vp9_decoder.c @@ -42,7 +42,7 @@ void vp9_initialize_dec() { } } -VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf) { +VP9Decoder *vp9_decoder_create() { VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; @@ -66,7 +66,6 @@ VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf) { vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); cm->current_video_frame = 0; - pbi->oxcf = *oxcf; pbi->ready_for_new_data = 1; pbi->decoded_key_frame = 0; @@ -280,16 +279,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, swap_frame_buffers(pbi); - if (!pbi->do_loopfilter_inline) { - // If multiple threads are used to decode tiles, then we use those threads - // to do parallel loopfiltering. - if (pbi->num_tile_workers) { - vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0); - } else { - vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0); - } - } - vp9_clear_system_state(); cm->last_width = cm->width; @@ -315,11 +304,14 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; +#if !CONFIG_VP9_POSTPROC + (void)*flags; +#endif if (pbi->ready_for_new_data == 1) return ret; - /* ie no raw frame to show!!! */ + /* no raw frame to show!!! */ if (pbi->common.show_frame == 0) return ret; @@ -330,8 +322,8 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(&pbi->common, sd, flags); #else - *sd = *pbi->common.frame_to_show; - ret = 0; + *sd = *pbi->common.frame_to_show; + ret = 0; #endif /*!CONFIG_POSTPROC*/ vp9_clear_system_state(); return ret; diff --git a/source/libvpx/vp9/decoder/vp9_decoder.h b/source/libvpx/vp9/decoder/vp9_decoder.h index ebcbb90..d6110c4 100644 --- a/source/libvpx/vp9/decoder/vp9_decoder.h +++ b/source/libvpx/vp9/decoder/vp9_decoder.h @@ -27,21 +27,11 @@ extern "C" { #endif -typedef struct VP9DecoderConfig { - int width; - int height; - int version; - int max_threads; - int inv_tile_order; -} VP9DecoderConfig; - typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); - VP9DecoderConfig oxcf; - int64_t last_time_stamp; int ready_for_new_data; @@ -49,7 +39,6 @@ typedef struct VP9Decoder { int decoded_key_frame; - int do_loopfilter_inline; // apply loopfilter to available rows immediately VP9Worker lf_worker; VP9Worker *tile_workers; @@ -59,6 +48,9 @@ typedef struct VP9Decoder { vpx_decrypt_cb decrypt_cb; void *decrypt_state; + + int max_threads; + int inv_tile_order; } VP9Decoder; void vp9_initialize_dec(); @@ -83,8 +75,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); - -struct VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf); +struct VP9Decoder *vp9_decoder_create(); void vp9_decoder_remove(struct VP9Decoder *pbi); diff --git a/source/libvpx/vp9/decoder/vp9_dthread.c b/source/libvpx/vp9/decoder/vp9_dthread.c index 9098063..bc6c418 100644 --- a/source/libvpx/vp9/decoder/vp9_dthread.c +++ b/source/libvpx/vp9/decoder/vp9_dthread.c @@ -89,7 +89,8 @@ static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, // Implement row loopfiltering for each thread. static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, - VP9_COMMON *const cm, MACROBLOCKD *const xd, + VP9_COMMON *const cm, + struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VP9LfSync *const lf_sync, int num_lf_workers) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; @@ -107,11 +108,11 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, sync_read(lf_sync, r, c); - vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { - vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); + vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); } sync_write(lf_sync, r, c, sb_cols); @@ -124,7 +125,7 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { TileWorkerData *const tile_data = (TileWorkerData*)arg1; LFWorkerData *const lf_data = &tile_data->lfdata; - loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd, + loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only, lf_data->lf_sync, lf_data->num_lf_workers); return 1; @@ -132,15 +133,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9Decoder *pbi, - VP9_COMMON *cm, +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + VP9Decoder *pbi, VP9_COMMON *cm, int frame_filter_level, - int y_only, int partial_frame) { + int y_only) { VP9LfSync *const lf_sync = &pbi->lf_row_sync; // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); + const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -184,9 +185,9 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi, worker->hook = (VP9WorkerHook)loop_filter_row_worker; // Loopfilter data - lf_data->frame_buffer = get_frame_new_buffer(cm); + lf_data->frame_buffer = frame; lf_data->cm = cm; - lf_data->xd = pbi->mb; + vp9_copy(lf_data->planes, pbi->mb.plane); lf_data->start = i; lf_data->stop = sb_rows; lf_data->y_only = y_only; // always do all planes in decoder diff --git a/source/libvpx/vp9/decoder/vp9_dthread.h b/source/libvpx/vp9/decoder/vp9_dthread.h index 8738cee..a727e2a 100644 --- a/source/libvpx/vp9/decoder/vp9_dthread.h +++ b/source/libvpx/vp9/decoder/vp9_dthread.h @@ -48,9 +48,10 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync, void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + struct VP9Decoder *pbi, struct VP9Common *cm, int frame_filter_level, - int y_only, int partial_frame); + int y_only); #endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/source/libvpx/vp9/encoder/vp9_bitstream.c b/source/libvpx/vp9/encoder/vp9_bitstream.c index 35d2ecf..8ef2b2e 100644 --- a/source/libvpx/vp9/encoder/vp9_bitstream.c +++ b/source/libvpx/vp9/encoder/vp9_bitstream.c @@ -485,8 +485,8 @@ static void write_modes(VP9_COMP *cpi, } static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, - vp9_coeff_stats *coef_branch_ct) { - vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size]; + vp9_coeff_stats *coef_branch_ct, + vp9_coeff_probs_model *coef_probs) { vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; @@ -513,10 +513,9 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, TX_SIZE tx_size, - vp9_coeff_stats *frame_branch_ct) { - vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size]; - vp9_coeff_probs_model *old_frame_coef_probs = - cpi->common.fc.coef_probs[tx_size]; + vp9_coeff_stats *frame_branch_ct, + vp9_coeff_probs_model *new_coef_probs) { + vp9_coeff_probs_model *old_coef_probs = cpi->common.fc.coef_probs[tx_size]; const vp9_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; @@ -530,14 +529,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { for (t = 0; t < entropy_nodes_update; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; - const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t]; + vp9_prob newp = new_coef_probs[i][j][k][l][t]; + const vp9_prob oldp = old_coef_probs[i][j][k][l][t]; int s; int u = 0; if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd); + old_coef_probs[i][j][k][l], &newp, upd); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); @@ -567,15 +566,15 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { // calc probs and branch cts for this frame only for (t = 0; t < entropy_nodes_update; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; - vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; + vp9_prob newp = new_coef_probs[i][j][k][l][t]; + vp9_prob *oldp = old_coef_probs[i][j][k][l] + t; const vp9_prob upd = DIFF_UPDATE_PROB; int s; int u = 0; if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd); + old_coef_probs[i][j][k][l], &newp, upd); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], @@ -612,8 +611,8 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { // calc probs and branch cts for this frame only for (t = 0; t < entropy_nodes_update; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; - vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; + vp9_prob newp = new_coef_probs[i][j][k][l][t]; + vp9_prob *oldp = old_coef_probs[i][j][k][l] + t; int s; int u = 0; if (l >= prev_coef_contexts_to_update || @@ -623,7 +622,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd); + old_coef_probs[i][j][k][l], &newp, upd); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], @@ -670,14 +669,17 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) { const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; + vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; vp9_clear_system_state(); for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size) - build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]); + build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size], + frame_coef_probs[tx_size]); for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]); + update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size], + frame_coef_probs[tx_size]); } static void encode_loopfilter(struct loopfilter *lf, diff --git a/source/libvpx/vp9/encoder/vp9_context_tree.c b/source/libvpx/vp9/encoder/vp9_context_tree.c index 659935c..ac9b562 100644 --- a/source/libvpx/vp9/encoder/vp9_context_tree.c +++ b/source/libvpx/vp9/encoder/vp9_context_tree.c @@ -8,14 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "vp9/encoder/vp9_context_tree.h" static const BLOCK_SIZE square[] = { - BLOCK_8X8, - BLOCK_16X16, - BLOCK_32X32, - BLOCK_64X64, + BLOCK_8X8, + BLOCK_16X16, + BLOCK_32X32, + BLOCK_64X64, }; static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, @@ -62,23 +61,25 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) { } } } -static void free_tree_contexts(PC_TREE *this_pc) { - free_mode_context(&this_pc->none); - free_mode_context(&this_pc->horizontal[0]); - free_mode_context(&this_pc->horizontal[1]); - free_mode_context(&this_pc->vertical[0]); - free_mode_context(&this_pc->vertical[1]); -} -static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *this_pc, + +static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *tree, int num_4x4_blk) { - alloc_mode_context(cm, num_4x4_blk, &this_pc->none); - alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[0]); - alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[0]); + alloc_mode_context(cm, num_4x4_blk, &tree->none); + alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]); + alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]); /* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used. * Figure out a better way to do this. */ - alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[1]); - alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[1]); + alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]); + alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[1]); +} + +static void free_tree_contexts(PC_TREE *tree) { + free_mode_context(&tree->none); + free_mode_context(&tree->horizontal[0]); + free_mode_context(&tree->horizontal[1]); + free_mode_context(&tree->vertical[0]); + free_mode_context(&tree->vertical[1]); } // This function sets up a tree of contexts such that at each square @@ -97,9 +98,9 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) { vpx_free(x->leaf_tree); CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes, - sizeof(PICK_MODE_CONTEXT))); + sizeof(*x->leaf_tree))); vpx_free(x->pc_tree); - CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(PC_TREE))); + CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(*x->pc_tree))); this_pc = &x->pc_tree[0]; this_leaf = &x->leaf_tree[0]; @@ -111,45 +112,45 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) { // Sets up all the leaf nodes in the tree. for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) { - x->pc_tree[pc_tree_index].block_size = square[0]; - alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], 4); - x->pc_tree[pc_tree_index].leaf_split[0] = this_leaf++; - for (j = 1; j < 4; j++) { - x->pc_tree[pc_tree_index].leaf_split[j] = - x->pc_tree[pc_tree_index].leaf_split[0]; - } + PC_TREE *const tree = &x->pc_tree[pc_tree_index]; + tree->block_size = square[0]; + alloc_tree_contexts(cm, tree, 4); + tree->leaf_split[0] = this_leaf++; + for (j = 1; j < 4; j++) + tree->leaf_split[j] = tree->leaf_split[0]; } // Each node has 4 leaf nodes, fill each block_size level of the tree // from leafs to the root. - for (nodes = 16; nodes > 0; nodes >>= 2, ++square_index) { - for (i = 0; i < nodes; ++pc_tree_index, ++i) { - alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], - 4 << (2 * square_index)); - x->pc_tree[pc_tree_index].block_size = square[square_index]; - for (j = 0; j < 4; j++) { - x->pc_tree[pc_tree_index].split[j] = this_pc++; - } + for (nodes = 16; nodes > 0; nodes >>= 2) { + for (i = 0; i < nodes; ++i) { + PC_TREE *const tree = &x->pc_tree[pc_tree_index]; + alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); + tree->block_size = square[square_index]; + for (j = 0; j < 4; j++) + tree->split[j] = this_pc++; + ++pc_tree_index; } + ++square_index; } - x->pc_root = &x->pc_tree[tree_nodes-1]; + x->pc_root = &x->pc_tree[tree_nodes - 1]; x->pc_root[0].none.best_mode_index = 2; } -void vp9_free_pc_tree(MACROBLOCK *m) { +void vp9_free_pc_tree(MACROBLOCK *x) { const int tree_nodes = 64 + 16 + 4 + 1; int i; // Set up all 4x4 mode contexts for (i = 0; i < 64; ++i) - free_mode_context(&m->leaf_tree[i]); + free_mode_context(&x->leaf_tree[i]); // Sets up all the leaf nodes in the tree. - for (i = 0; i < tree_nodes; i++) { - free_tree_contexts(&m->pc_tree[i]); - } - vpx_free(m->pc_tree); - m->pc_tree = 0; - vpx_free(m->leaf_tree); - m->leaf_tree = 0; + for (i = 0; i < tree_nodes; ++i) + free_tree_contexts(&x->pc_tree[i]); + + vpx_free(x->pc_tree); + x->pc_tree = NULL; + vpx_free(x->leaf_tree); + x->leaf_tree = NULL; } diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c index 87051d5..86e5986 100644 --- a/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -201,7 +201,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, mbmi = &xd->mi[0]->mbmi; // Set up destination pointers. - vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); // Set up limit values for MV components. // Mv beyond the range do not produce new/different prediction block. @@ -254,7 +254,6 @@ static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, } static void set_block_size(VP9_COMP * const cpi, - const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { @@ -377,11 +376,9 @@ static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { static int set_vt_partitioning(VP9_COMP *cpi, void *data, - const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, - int mi_col, - int mi_size) { + int mi_col) { VP9_COMMON * const cm = &cpi->common; variance_node vt; const int block_width = num_8x8_blocks_wide_lookup[bsize]; @@ -398,7 +395,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { - set_block_size(cpi, tile, mi_row, mi_col, bsize); + set_block_size(cpi, mi_row, mi_col, bsize); return 1; } @@ -407,8 +404,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, vt.part_variances->vert[0].variance < threshold && vt.part_variances->vert[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); - set_block_size(cpi, tile, mi_row, mi_col, subsize); - set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize); + set_block_size(cpi, mi_row, mi_col, subsize); + set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize); return 1; } @@ -417,8 +414,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, vt.part_variances->horz[0].variance < threshold && vt.part_variances->horz[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); - set_block_size(cpi, tile, mi_row, mi_col, subsize); - set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize); + set_block_size(cpi, mi_row, mi_col, subsize); + set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize); return 1; } return 0; @@ -505,13 +502,13 @@ static void choose_partitioning(VP9_COMP *cpi, // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold, or we // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64, - mi_row, mi_col, 8)) { + if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64, + mi_row, mi_col)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); - if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32, - (mi_row + y32_idx), (mi_col + x32_idx), 4)) { + if (!set_vt_partitioning(cpi, &vt.split[i], BLOCK_32X32, + (mi_row + y32_idx), (mi_col + x32_idx))) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); @@ -521,7 +518,7 @@ static void choose_partitioning(VP9_COMP *cpi, #ifdef DISABLE_8X8_VAR_BASED_PARTITION if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows && mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) { - set_block_size(cpi, tile, + set_block_size(cpi, (mi_row + y32_idx + y16_idx), (mi_col + x32_idx + x16_idx), BLOCK_16X16); @@ -529,7 +526,7 @@ static void choose_partitioning(VP9_COMP *cpi, for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - set_block_size(cpi, tile, + set_block_size(cpi, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); @@ -543,7 +540,7 @@ static void choose_partitioning(VP9_COMP *cpi, for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - set_block_size(cpi, tile, + set_block_size(cpi, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); @@ -1456,8 +1453,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, - int do_recon, PC_TREE *pc_tree, - int block) { + int do_recon, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -1624,7 +1620,7 @@ static void rd_use_partition(VP9_COMP *cpi, rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt, - i != 3, pc_tree->split[i], i); + i != 3, pc_tree->split[i]); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; last_part_dist = INT64_MAX; @@ -1809,15 +1805,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MODE_INFO **mi_8x8 = xd->mi; - const int left_in_image = xd->left_available && mi_8x8[-1]; - const int above_in_image = xd->up_available && - mi_8x8[-xd->mi_stride]; - MODE_INFO **above_sb64_mi_8x8; - MODE_INFO **left_sb64_mi_8x8; - - int row8x8_remaining = tile->mi_row_end - mi_row; - int col8x8_remaining = tile->mi_col_end - mi_col; + MODE_INFO **mi = xd->mi; + const int left_in_image = xd->left_available && mi[-1]; + const int above_in_image = xd->up_available && mi[-xd->mi_stride]; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE max_size = BLOCK_64X64; @@ -1837,15 +1829,13 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, } // Find the min and max partition sizes used in the left SB64 if (left_in_image) { - left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; - get_sb_partition_size_range(cpi, left_sb64_mi_8x8, - &min_size, &max_size); + MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; + get_sb_partition_size_range(cpi, left_sb64_mi, &min_size, &max_size); } // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; - get_sb_partition_size_range(cpi, above_sb64_mi_8x8, - &min_size, &max_size); + MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; + get_sb_partition_size_range(cpi, above_sb64_mi, &min_size, &max_size); } // adjust observed min and max if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { @@ -1871,6 +1861,67 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, *max_block_size = max_size; } +static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE *min_block_size, + BLOCK_SIZE *max_block_size) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MODE_INFO **mi_8x8 = xd->mi; + const int left_in_image = xd->left_available && mi_8x8[-1]; + const int above_in_image = xd->up_available && + mi_8x8[-xd->mi_stride]; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int bh, bw; + BLOCK_SIZE min_size = BLOCK_32X32; + BLOCK_SIZE max_size = BLOCK_8X8; + int bsl = mi_width_log2_lookup[BLOCK_64X64]; + int search_range_ctrl = (((mi_row + mi_col) >> bsl) + + cpi->sf.chessboard_index) & 0x01; + // Trap case where we do not have a prediction. + if (search_range_ctrl && + (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) { + int block; + MODE_INFO **mi; + BLOCK_SIZE sb_type; + + // Find the min and max partition sizes used in the left SB64. + if (left_in_image) { + MODE_INFO *cur_mi; + mi = &mi_8x8[-1]; + for (block = 0; block < MI_BLOCK_SIZE; ++block) { + cur_mi = mi[block * xd->mi_stride]; + sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0; + min_size = MIN(min_size, sb_type); + max_size = MAX(max_size, sb_type); + } + } + // Find the min and max partition sizes used in the above SB64. + if (above_in_image) { + mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; + for (block = 0; block < MI_BLOCK_SIZE; ++block) { + sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0; + min_size = MIN(min_size, sb_type); + max_size = MAX(max_size, sb_type); + } + } + + min_size = min_partition_size[min_size]; + max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, + &bh, &bw); + min_size = MIN(min_size, max_size); + min_size = MAX(min_size, BLOCK_8X8); + max_size = MIN(max_size, BLOCK_32X32); + } else { + min_size = BLOCK_8X8; + max_size = BLOCK_32X32; + } + + *min_block_size = min_size; + *max_block_size = max_size; +} + static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); } @@ -1886,7 +1937,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, int do_recon, int64_t best_rd, - PC_TREE *pc_tree, int block) { + PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -2038,7 +2089,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, - best_rd - sum_rd, pc_tree->split[i], i); + best_rd - sum_rd, pc_tree->split[i]); if (this_rate == INT_MAX) { sum_rd = INT64_MAX; @@ -2239,26 +2290,26 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, sf->partition_search_type == VAR_BASED_PARTITION || sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { const int idx_str = cm->mi_stride * mi_row + mi_col; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; - MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; + MODE_INFO **mi = cm->mi_grid_visible + idx_str; + MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; if (sf->partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, + set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, sf->always_this_block_size); - rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, x->pc_root, 0); + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, x->pc_root); } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, x->pc_root, 0); + set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, x->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); - rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, x->pc_root, 0); + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, x->pc_root); } else { if ((cm->current_video_frame % sf->last_partitioning_redo_frequency) == 0 @@ -2268,7 +2319,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, || cpi->rc.is_src_frame_alt_ref || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && - sb_has_motion(cm, prev_mi_8x8))) { + sb_has_motion(cm, prev_mi))) { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); @@ -2277,17 +2328,16 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, - 0); + &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root); } else { if (sf->constrain_copy_partition && - sb_has_motion(cm, prev_mi_8x8)) - constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8, + sb_has_motion(cm, prev_mi)) + constrain_copy_partitioning(cpi, tile, mi, prev_mi, mi_row, mi_col, BLOCK_16X16); else - copy_partitioning(cm, mi_8x8, prev_mi_8x8); - rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, x->pc_root, 0); + copy_partitioning(cm, mi, prev_mi); + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, x->pc_root); } } } else { @@ -2299,7 +2349,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, 0); + &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root); } } } @@ -2623,9 +2673,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; - load_pred_mv(x, ctx); - nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, 0, best_rd - sum_rd, pc_tree->split[i]); @@ -2768,7 +2816,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, + MODE_INFO **mi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled, @@ -2787,7 +2835,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4; + subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; switch (partition) { @@ -2815,7 +2863,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row + hbs < cm->mi_rows) { nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); - pc_tree->horizontal[1].mic.mbmi = mi_8x8[0]->mbmi; + pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2825,10 +2873,10 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, subsize, output_enabled, totrate, totdist, pc_tree->split[0]); - nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp, + nonrd_use_partition(cpi, tile, mi + hbs, tp, mi_row, mi_col + hbs, subsize, output_enabled, &rate, &dist, pc_tree->split[1]); if (rate != INT_MAX && dist != INT64_MAX && @@ -2836,7 +2884,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, *totrate += rate; *totdist += dist; } - nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp, + nonrd_use_partition(cpi, tile, mi + hbs * mis, tp, mi_row + hbs, mi_col, subsize, output_enabled, &rate, &dist, pc_tree->split[2]); if (rate != INT_MAX && dist != INT64_MAX && @@ -2844,7 +2892,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, *totrate += rate; *totdist += dist; } - nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp, + nonrd_use_partition(cpi, tile, mi + hbs * mis + hbs, tp, mi_row + hbs, mi_col + hbs, subsize, output_enabled, &rate, &dist, pc_tree->split[3]); if (rate != INT_MAX && dist != INT64_MAX && @@ -2883,8 +2931,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int dummy_rate = 0; int64_t dummy_dist = 0; const int idx_str = cm->mi_stride * mi_row + mi_col; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; - MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; + MODE_INFO **mi = cm->mi_grid_visible + idx_str; + MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str; BLOCK_SIZE bsize; x->in_static_area = 0; @@ -2895,12 +2943,12 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, switch (cpi->sf.partition_search_type) { case VAR_BASED_PARTITION: choose_partitioning(cpi, tile, mi_row, mi_col); - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, x->pc_root); break; case SOURCE_VAR_BASED_PARTITION: - set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + set_source_var_based_partition(cpi, tile, mi, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, x->pc_root); break; case VAR_BASED_FIXED_PARTITION: @@ -2908,19 +2956,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? cpi->sf.always_this_block_size : get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); + nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, x->pc_root); break; case REFERENCE_PARTITION: if (cpi->sf.partition_check || !is_background(cpi, tile, mi_row, mi_col)) { + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + auto_partition_range(cpi, tile, mi_row, mi_col, + &cpi->sf.min_partition_size, + &cpi->sf.max_partition_size); nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root); } else { - copy_partitioning(cm, mi_8x8, prev_mi_8x8); - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + copy_partitioning(cm, mi, prev_mi); + nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, x->pc_root); } diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c index d71b16f..3b231b7 100644 --- a/source/libvpx/vp9/encoder/vp9_encodemb.c +++ b/source/libvpx/vp9/encoder/vp9_encodemb.c @@ -99,7 +99,7 @@ static int trellis_get_coeff_context(const int16_t *scan, } static int optimize_b(MACROBLOCK *mb, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) { + TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -381,7 +381,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { const int ctx = combine_entropy_contexts(*a, *l); - *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0; + *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; } else { *a = *l = p->eobs[block] > 0; } diff --git a/source/libvpx/vp9/encoder/vp9_encoder.c b/source/libvpx/vp9/encoder/vp9_encoder.c index cc2c552..911ce7c 100644 --- a/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/source/libvpx/vp9/encoder/vp9_encoder.c @@ -115,22 +115,6 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { } } -static void setup_key_frame(VP9_COMP *cpi) { - vp9_setup_past_independence(&cpi->common); - - // All buffers are implicitly updated on key frames. - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; -} - -static void setup_inter_frame(VP9_COMMON *cm) { - if (cm->error_resilient_mode || cm->intra_only) - vp9_setup_past_independence(cm); - - assert(cm->frame_context_idx < FRAME_CONTEXTS); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; -} - static void setup_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; // Set up entropy context depending on frame type. The decoder mandates @@ -138,17 +122,21 @@ static void setup_frame(VP9_COMP *cpi) { // frames where the error_resilient_mode or intra_only flag is set. For // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. + if (frame_is_intra_only(cm) || cm->error_resilient_mode) { + vp9_setup_past_independence(cm); + } else { + if (!cpi->use_svc) + cm->frame_context_idx = cpi->refresh_alt_ref_frame; + } + if (cm->frame_type == KEY_FRAME) { - setup_key_frame(cpi); + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) - cm->frame_context_idx = cpi->refresh_alt_ref_frame; - setup_inter_frame(cm); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; } } - - void vp9_initialize_enc() { static int init_done = 0; @@ -761,7 +749,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { - int i, j; + unsigned int i, j; VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; @@ -1054,7 +1042,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { } void vp9_remove_compressor(VP9_COMP *cpi) { - int i; + unsigned int i; if (!cpi) return; @@ -1617,7 +1605,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { } if (lf->filter_level > 0) { - vp9_loop_filter_frame(cm, xd, lf->filter_level, 0, 0); + vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); } vp9_extend_frame_inner_borders(cm->frame_to_show); @@ -1737,8 +1725,6 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { #endif static void encode_without_recode_loop(VP9_COMP *cpi, - size_t *size, - uint8_t *dest, int q) { VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); @@ -2174,7 +2160,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } if (cpi->sf.recode_loop == DISALLOW_RECODE) { - encode_without_recode_loop(cpi, size, dest, q); + encode_without_recode_loop(cpi, q); } else { encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index); } @@ -2236,9 +2222,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } -#if 0 - output_frame_level_debug_stats(cpi); -#endif if (cpi->refresh_golden_frame == 1) cpi->frame_flags |= FRAMEFLAGS_GOLDEN; else @@ -2254,6 +2237,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->last_frame_type = cm->frame_type; vp9_rc_postencode_update(cpi, *size); +#if 0 + output_frame_level_debug_stats(cpi); +#endif + if (cm->frame_type == KEY_FRAME) { // Tell the caller that the frame was coded as a key frame *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; @@ -2790,6 +2777,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags) { VP9_COMMON *cm = &cpi->common; +#if !CONFIG_VP9_POSTPROC + (void)flags; +#endif if (!cm->show_frame) { return -1; @@ -2798,7 +2788,6 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(cm, dest, flags); #else - if (cm->frame_to_show) { *dest = *cm->frame_to_show; dest->y_width = cm->width; @@ -2809,64 +2798,13 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, } else { ret = -1; } - #endif // !CONFIG_VP9_POSTPROC vp9_clear_system_state(); return ret; } } -int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows, - unsigned int cols, int delta_q[MAX_SEGMENTS], - int delta_lf[MAX_SEGMENTS], - unsigned int threshold[MAX_SEGMENTS]) { - signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS]; - struct segmentation *seg = &cpi->common.seg; - const VP9_COMMON *const cm = &cpi->common; - int i; - - if (cm->mb_rows != rows || cm->mb_cols != cols) - return -1; - - if (!map) { - vp9_disable_segmentation(seg); - return 0; - } - - vpx_memcpy(cpi->segmentation_map, map, cm->mi_rows * cm->mi_cols); - - // Activate segmentation. - vp9_enable_segmentation(seg); - - // Set up the quant, LF and breakout threshold segment data - for (i = 0; i < MAX_SEGMENTS; i++) { - feature_data[SEG_LVL_ALT_Q][i] = delta_q[i]; - feature_data[SEG_LVL_ALT_LF][i] = delta_lf[i]; - cpi->segment_encode_breakout[i] = threshold[i]; - } - - // Enable the loop and quant changes in the feature mask - for (i = 0; i < MAX_SEGMENTS; i++) { - if (delta_q[i]) - vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); - else - vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q); - - if (delta_lf[i]) - vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF); - else - vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF); - } - - // Initialize the feature data structure - // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 - vp9_set_segment_data(seg, &feature_data[0][0], SEGMENT_DELTADATA); - - return 0; -} - -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, - unsigned int rows, unsigned int cols) { +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { if (map) { vpx_memcpy(cpi->active_map, map, rows * cols); diff --git a/source/libvpx/vp9/encoder/vp9_encoder.h b/source/libvpx/vp9/encoder/vp9_encoder.h index f48909e..17c826f 100644 --- a/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/source/libvpx/vp9/encoder/vp9_encoder.h @@ -391,7 +391,6 @@ typedef struct VP9_COMP { RATE_CONTROL rc; vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; - vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; struct vpx_codec_pkt_list *output_pkt_list; @@ -552,14 +551,7 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, int vp9_update_entropy(VP9_COMP *cpi, int update); -int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, - unsigned int rows, unsigned int cols, - int delta_q[MAX_SEGMENTS], - int delta_lf[MAX_SEGMENTS], - unsigned int threshold[MAX_SEGMENTS]); - -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, - unsigned int rows, unsigned int cols); +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c index efa320f..ed72d78 100644 --- a/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -61,6 +61,7 @@ #define MIN_GF_INTERVAL 4 #endif + // #define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { @@ -540,7 +541,7 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_setup_src_planes(x, cpi->Source, 0, 0); vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); - vp9_setup_dst_planes(xd, new_yv12, 0, 0); + vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0); xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; @@ -1417,12 +1418,90 @@ void define_fixed_arf_period(VP9_COMP *cpi) { } #endif +// Calculate a section intra ratio used in setting max loop filter. +static void calculate_section_intra_ratio(struct twopass_rc *twopass, + const FIRSTPASS_STATS *start_pos, + int section_length) { + FIRSTPASS_STATS next_frame; + FIRSTPASS_STATS sectionstats; + int i; + + vp9_zero(next_frame); + vp9_zero(sectionstats); + + reset_fpf_position(twopass, start_pos); + + for (i = 0; i < section_length; ++i) { + input_stats(twopass, &next_frame); + accumulate_stats(§ionstats, &next_frame); + } + + avg_stats(§ionstats); + + twopass->section_intra_rating = + (int)(sectionstats.intra_error / + DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); + + reset_fpf_position(twopass, start_pos); +} + +// Calculate the total bits to allocate in this GF/ARF group. +static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi, + double gf_group_err) { + const RATE_CONTROL *const rc = &cpi->rc; + const struct twopass_rc *const twopass = &cpi->twopass; + const int max_bits = frame_max_bits(rc, &cpi->oxcf); + int64_t total_group_bits; + + // Calculate the bits to be allocated to the group as a whole. + if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) { + total_group_bits = (int64_t)(twopass->kf_group_bits * + (gf_group_err / twopass->kf_group_error_left)); + } else { + total_group_bits = 0; + } + + // Clamp odd edge cases. + total_group_bits = (total_group_bits < 0) ? + 0 : (total_group_bits > twopass->kf_group_bits) ? + twopass->kf_group_bits : total_group_bits; + + // Clip based on user supplied data rate variability limit. + if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval) + total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval; + + return total_group_bits; +} + +// Calculate the number bits extra to assign to boosted frames in a group. +static int calculate_boost_bits(int frame_count, + int boost, int64_t total_group_bits) { + int allocation_chunks; + + // return 0 for invalid inputs (could arise e.g. through rounding errors) + if (!boost || (total_group_bits <= 0) || (frame_count <= 0) ) + return 0; + + allocation_chunks = (frame_count * 100) + boost; + + // Prevent overflow. + if (boost > 1023) { + int divisor = boost >> 10; + boost /= divisor; + allocation_chunks /= divisor; + } + + // Calculate the number of extra bits for use in the boosted frame or frames. + return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); +} + + // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; struct twopass_rc *const twopass = &cpi->twopass; - FIRSTPASS_STATS next_frame = { 0 }; + FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *start_pos; int i; double boost_score = 0.0; @@ -1442,8 +1521,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - // Max bits for a single frame. - const int max_bits = frame_max_bits(rc, oxcf); unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames; int f_boost = 0; @@ -1451,10 +1528,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int flash_detected; int active_max_gf_interval; - twopass->gf_group_bits = 0; - vp9_clear_system_state(); + vp9_zero(next_frame); + twopass->gf_group_bits = 0; start_pos = twopass->stats_in; // Load stats for the current frame. @@ -1657,149 +1734,57 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } #endif #endif - - // Calculate the bits to be allocated to the group as a whole. - if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) { - twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits * - (gf_group_err / twopass->kf_group_error_left)); - } else { - twopass->gf_group_bits = 0; - } - twopass->gf_group_bits = (twopass->gf_group_bits < 0) ? - 0 : (twopass->gf_group_bits > twopass->kf_group_bits) ? - twopass->kf_group_bits : twopass->gf_group_bits; - - // Clip cpi->twopass.gf_group_bits based on user supplied data rate - // variability limit, cpi->oxcf.two_pass_vbrmax_section. - if (twopass->gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval) - twopass->gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval; - // Reset the file position. reset_fpf_position(twopass, start_pos); - // Assign bits to the arf or gf. - for (i = 0; i <= (rc->source_alt_ref_pending && - cpi->common.frame_type != KEY_FRAME); ++i) { - int allocation_chunks; - int q = rc->last_q[INTER_FRAME]; - int gf_bits; + // Calculate the bits to be allocated to the gf/arf group as a whole + twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); + // Calculate the extra bits to be used for boosted frame(s) + { + int q = rc->last_q[INTER_FRAME]; int boost = (rc->gfu_boost * gfboost_qadjust(q)) / 100; // Set max and minimum boost and hence minimum allocation. boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200); - if (rc->source_alt_ref_pending && i == 0) - allocation_chunks = ((rc->baseline_gf_interval + 1) * 100) + boost; - else - allocation_chunks = (rc->baseline_gf_interval * 100) + (boost - 100); - - // Prevent overflow. - if (boost > 1023) { - int divisor = boost >> 10; - boost /= divisor; - allocation_chunks /= divisor; - } - - // Calculate the number of bits to be spent on the gf or arf based on - // the boost number. - gf_bits = (int)((double)boost * (twopass->gf_group_bits / - (double)allocation_chunks)); - - // If the frame that is to be boosted is simpler than the average for - // the gf/arf group then use an alternative calculation - // based on the error score of the frame itself. - if (rc->baseline_gf_interval < 1 || - mod_frame_err < gf_group_err / (double)rc->baseline_gf_interval) { - double alt_gf_grp_bits = (double)twopass->kf_group_bits * - (mod_frame_err * (double)rc->baseline_gf_interval) / - DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left); - - int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / - (double)allocation_chunks)); - - if (gf_bits > alt_gf_bits) - gf_bits = alt_gf_bits; - } else { - // If it is harder than other frames in the group make sure it at - // least receives an allocation in keeping with its relative error - // score, otherwise it may be worse off than an "un-boosted" frame. - int alt_gf_bits = (int)((double)twopass->kf_group_bits * - mod_frame_err / - DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left)); - - if (alt_gf_bits > gf_bits) - gf_bits = alt_gf_bits; - } + // Calculate the extra bits to be used for boosted frame(s) + twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval, + boost, twopass->gf_group_bits); - // Don't allow a negative value for gf_bits. - if (gf_bits < 0) - gf_bits = 0; - if (i == 0) { - twopass->gf_bits = gf_bits; - } - if (i == 1 || - (!rc->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME && - !vp9_is_upper_layer_key_frame(cpi))) { - // Calculate the per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, gf_bits); + // For key frames the frame target rate is set already. + // NOTE: We dont bother to check for the special case of ARF overlay + // frames here, as there is clamping code for this in the function + // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass + // encodes. + if (cpi->common.frame_type != KEY_FRAME && + !vp9_is_upper_layer_key_frame(cpi)) { + vp9_rc_set_frame_target(cpi, twopass->gf_bits); } } - { - // Adjust KF group bits and error remaining. - twopass->kf_group_error_left -= (int64_t)gf_group_err; - - // If this is an arf update we want to remove the score for the overlay - // frame at the end which will usually be very cheap to code. - // The overlay frame has already, in effect, been coded so we want to spread - // the remaining bits among the other frames. - // For normal GFs remove the score for the GF itself unless this is - // also a key frame in which case it has already been accounted for. - if (rc->source_alt_ref_pending) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err); - } else if (cpi->common.frame_type != KEY_FRAME) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - - gf_first_frame_err); - } else { - twopass->gf_group_error_left = (int64_t)gf_group_err; - } - - // This condition could fail if there are two kfs very close together - // despite MIN_GF_INTERVAL and would cause a divide by 0 in the - // calculation of alt_extra_bits. - if (rc->baseline_gf_interval >= 3) { - const int boost = rc->source_alt_ref_pending ? b_boost : rc->gfu_boost; - - if (boost >= 150) { - const int pct_extra = MIN(20, (boost - 100) / 50); - const int alt_extra_bits = (int)(( - MAX(twopass->gf_group_bits - twopass->gf_bits, 0) * - pct_extra) / 100); - twopass->gf_group_bits -= alt_extra_bits; - } - } + // Adjust KF group bits and error remaining. + twopass->kf_group_error_left -= (int64_t)gf_group_err; + + // If this is an arf update we want to remove the score for the overlay + // frame at the end which will usually be very cheap to code. + // The overlay frame has already, in effect, been coded so we want to spread + // the remaining bits among the other frames. + // For normal GFs remove the score for the GF itself unless this is + // also a key frame in which case it has already been accounted for. + if (rc->source_alt_ref_pending) { + twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err); + } else if (cpi->common.frame_type != KEY_FRAME) { + twopass->gf_group_error_left = (int64_t)(gf_group_err + - gf_first_frame_err); + } else { + twopass->gf_group_error_left = (int64_t)gf_group_err; } + // Calculate a section intra ratio used in setting max loop filter. if (cpi->common.frame_type != KEY_FRAME) { - FIRSTPASS_STATS sectionstats; - - zero_stats(§ionstats); - reset_fpf_position(twopass, start_pos); - - for (i = 0; i < rc->baseline_gf_interval; ++i) { - input_stats(twopass, &next_frame); - accumulate_stats(§ionstats, &next_frame); - } - - avg_stats(§ionstats); - - twopass->section_intra_rating = (int) - (sectionstats.intra_error / - DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); - - reset_fpf_position(twopass, start_pos); + calculate_section_intra_ratio(twopass, start_pos, rc->baseline_gf_interval); } } @@ -2050,15 +2035,15 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } else { twopass->kf_group_bits = 0; } + twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); + // Reset the first pass file position. reset_fpf_position(twopass, start_position); - // Determine how big to make this keyframe based on how well the subsequent - // frames use inter blocks. + // Scan through the kf group collating various stats used to deteermine + // how many bits to spend on it. decay_accumulator = 1.0; boost_score = 0.0; - - // Scan through the kf group collating various stats. for (i = 0; i < rc->frames_to_key; ++i) { if (EOF == input_stats(twopass, &next_frame)) break; @@ -2095,101 +2080,27 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } - { - FIRSTPASS_STATS sectionstats; + // Store the zero motion percentage + twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); - zero_stats(§ionstats); - reset_fpf_position(twopass, start_position); + // Calculate a section intra ratio used in setting max loop filter. + calculate_section_intra_ratio(twopass, start_position, rc->frames_to_key); - for (i = 0; i < rc->frames_to_key; ++i) { - input_stats(twopass, &next_frame); - accumulate_stats(§ionstats, &next_frame); - } - - avg_stats(§ionstats); - - twopass->section_intra_rating = (int) (sectionstats.intra_error / - DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); - } + // Work out how many bits to allocate for the key frame itself. + rc->kf_boost = (int)boost_score; - // Reset the first pass file position. - reset_fpf_position(twopass, start_position); + if (rc->kf_boost < (rc->frames_to_key * 3)) + rc->kf_boost = (rc->frames_to_key * 3); + if (rc->kf_boost < MIN_KF_BOOST) + rc->kf_boost = MIN_KF_BOOST; - // Work out how many bits to allocate for the key frame itself. - if (1) { - int kf_boost = (int)boost_score; - int allocation_chunks; - - if (kf_boost < (rc->frames_to_key * 3)) - kf_boost = (rc->frames_to_key * 3); - - if (kf_boost < MIN_KF_BOOST) - kf_boost = MIN_KF_BOOST; - - // Make a note of baseline boost and the zero motion - // accumulator value for use elsewhere. - rc->kf_boost = kf_boost; - twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); - - // Key frame size depends on: - // (1) the error score for the whole key frame group, - // (2) the key frames' own error if this is smaller than the - // average for the group (optional), - // (3) insuring that the frame receives at least the allocation it would - // have received based on its own error score vs the error score - // remaining. - // Special case: - // If the sequence appears almost totally static we want to spend almost - // all of the bits on the key frame. - // - // We use (cpi->rc.frames_to_key - 1) below because the key frame itself is - // taken care of by kf_boost. - if (zero_motion_accumulator >= 0.99) { - allocation_chunks = ((rc->frames_to_key - 1) * 10) + kf_boost; - } else { - allocation_chunks = ((rc->frames_to_key - 1) * 100) + kf_boost; - } + twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1), + rc->kf_boost, twopass->kf_group_bits); - // Prevent overflow. - if (kf_boost > 1028) { - const int divisor = kf_boost >> 10; - kf_boost /= divisor; - allocation_chunks /= divisor; - } + twopass->kf_group_bits -= twopass->kf_bits; - twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); - // Calculate the number of bits to be spent on the key frame. - twopass->kf_bits = (int)((double)kf_boost * - ((double)twopass->kf_group_bits / allocation_chunks)); - - // If the key frame is actually easier than the average for the - // kf group (which does sometimes happen, e.g. a blank intro frame) - // then use an alternate calculation based on the kf error score - // which should give a smaller key frame. - if (kf_mod_err < kf_group_err / rc->frames_to_key) { - double alt_kf_grp_bits = ((double)twopass->bits_left * - (kf_mod_err * (double)rc->frames_to_key) / - DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)); - - const int alt_kf_bits = (int)((double)kf_boost * - (alt_kf_grp_bits / (double)allocation_chunks)); - - if (twopass->kf_bits > alt_kf_bits) - twopass->kf_bits = alt_kf_bits; - } else { - // Else if it is much harder than other frames in the group make sure - // it at least receives an allocation in keeping with its relative - // error score. - const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / - DOUBLE_DIVIDE_CHECK(twopass->modified_error_left))); - - if (alt_kf_bits > twopass->kf_bits) - twopass->kf_bits = alt_kf_bits; - } - twopass->kf_group_bits -= twopass->kf_bits; - // Per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, twopass->kf_bits); - } + // Per frame bit target for this frame. + vp9_rc_set_frame_target(cpi, twopass->kf_bits); // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); @@ -2242,8 +2153,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { double this_frame_coded_error; int target; LAYER_CONTEXT *lc = NULL; - int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); - + const int is_spatial_svc = (cpi->use_svc && + cpi->svc.number_temporal_layers == 1); if (is_spatial_svc) { lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; frames_left = (int)(twopass->total_stats.count - @@ -2303,14 +2214,14 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); // Don't place key frame in any enhancement layers in spatial svc - if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + if (is_spatial_svc) { lc->is_key_frame = 1; if (cpi->svc.spatial_layer_id > 0) { cm->frame_type = INTER_FRAME; } } } else { - if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + if (is_spatial_svc) { lc->is_key_frame = 0; } cm->frame_type = INTER_FRAME; diff --git a/source/libvpx/vp9/encoder/vp9_mbgraph.c b/source/libvpx/vp9/encoder/vp9_mbgraph.c index e7dcc7a..5e87d28 100644 --- a/source/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/source/libvpx/vp9/encoder/vp9_mbgraph.c @@ -20,7 +20,6 @@ #include "vp9/common/vp9_systemdependent.h" - static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, MV *dst_mv, @@ -237,8 +236,9 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int mb_col, mb_row, offset = 0; int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0}; - MODE_INFO mi_local = { { 0 } }; + MODE_INFO mi_local; + vp9_zero(mi_local); // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. x->mv_row_min = -BORDER_MV_PIXELS_B16; diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c index 43c8ab8..4f7d6f1 100644 --- a/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -886,6 +886,10 @@ int vp9_full_range_search_c(const MACROBLOCK *x, int r, c, i; int start_col, end_col, start_row, end_row; + // The cfg and search_param parameters are not used in this search variant + (void)cfg; + (void)search_param; + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); *best_mv = *ref_mv; *num00 = 11; @@ -1551,7 +1555,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, - const uint8_t *second_pred, int w, int h) { + const uint8_t *second_pred) { const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; const MACROBLOCKD *const xd = &x->e_mbd; diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.h b/source/libvpx/vp9/encoder/vp9_mcomp.h index 827957d..873edf3 100644 --- a/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -144,8 +144,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, const uint8_t *second_pred, - int w, int h); + const MV *center_mv, const uint8_t *second_pred); #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.c b/source/libvpx/vp9/encoder/vp9_picklpf.c index 7c42bb8..5328465 100644 --- a/source/libvpx/vp9/encoder/vp9_picklpf.c +++ b/source/libvpx/vp9/encoder/vp9_picklpf.c @@ -24,8 +24,12 @@ #include "vp9/encoder/vp9_quantize.h" static int get_max_filter_level(const VP9_COMP *cpi) { - return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 - : MAX_LOOP_FILTER; + if (cpi->pass == 2) { + return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 + : MAX_LOOP_FILTER; + } else { + return MAX_LOOP_FILTER; + } } @@ -34,7 +38,8 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); + vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1, + partial_frame); filt_err = vp9_get_y_sse(sd, cm->frame_to_show); // Re-instate the unfiltered frame @@ -77,8 +82,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Bias against raising loop filter in favor of lowering it. int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; - if (cpi->twopass.section_intra_rating < 20) - bias = bias * cpi->twopass.section_intra_rating / 20; + if ((cpi->pass == 2) && (cpi->twopass.section_intra_rating < 20)) + bias = (bias * cpi->twopass.section_intra_rating) / 20; // yx, bias less for large block size if (cm->tx_mode != ONLY_4X4) diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c index adaa044..1e9887c 100644 --- a/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -27,12 +27,11 @@ #include "vp9/encoder/vp9_rdopt.h" static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int step_param; int sadpb = x->sadperbit16; MV mvp_full; @@ -107,12 +106,11 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, MV *tmp_mv) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int ref = mbmi->ref_frame[0]; MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; int dis; @@ -290,7 +288,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; - full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + full_pixel_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], &rate_mv); if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) @@ -301,7 +299,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd) continue; - sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + sub_pixel_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame].as_mv); } diff --git a/source/libvpx/vp9/encoder/vp9_quantize.c b/source/libvpx/vp9/encoder/vp9_quantize.c index 5206bb6..4d3086d 100644 --- a/source/libvpx/vp9/encoder/vp9_quantize.c +++ b/source/libvpx/vp9/encoder/vp9_quantize.c @@ -32,6 +32,7 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count, zbin_ptr[1] + zbin_oq_value }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + (void)iscan; vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); @@ -87,6 +88,7 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int idx = 0; int idx_arr[1024]; int i, eob = -1; + (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t)); diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c index fe43f3a..a04622c 100644 --- a/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -48,6 +48,7 @@ static int kf_high_motion_minq[QINDEX_RANGE]; static int arfgf_low_motion_minq[QINDEX_RANGE]; static int arfgf_high_motion_minq[QINDEX_RANGE]; static int inter_minq[QINDEX_RANGE]; +static int rtc_minq[QINDEX_RANGE]; static int gf_high = 2000; static int gf_low = 400; static int kf_high = 5000; @@ -84,6 +85,7 @@ void vp9_rc_init_minq_luts() { arfgf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30); arfgf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90); + rtc_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70); } } @@ -549,14 +551,14 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, // Use the lower of active_worst_quality and recent/average Q. if (cm->current_video_frame > 1) { if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) - active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]]; else - active_best_quality = inter_minq[active_worst_quality]; + active_best_quality = rtc_minq[active_worst_quality]; } else { if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality) - active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]]; else - active_best_quality = inter_minq[active_worst_quality]; + active_best_quality = rtc_minq[active_worst_quality]; } } @@ -972,11 +974,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); } - // Q of 0 is disabled because we force tx size to be - // 16x16... if (cpi->sf.use_nonrd_pick_mode) { - if (q == 0) - q++; if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex; @@ -1149,10 +1147,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { cpi->rc.frames_to_key--; } -static int test_for_kf_one_pass(VP9_COMP *cpi) { - // Placeholder function for auto key frame - return 0; -} // Use this macro to turn on/off use of alt-refs in one-pass mode. #define USE_ALTREF_FOR_ONE_PASS 1 @@ -1184,11 +1178,12 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; + // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || - (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + (cpi->oxcf.auto_key && 0))) { cm->frame_type = KEY_FRAME; rc->this_key_frame_forced = cm->current_video_frame != 0 && rc->frames_to_key == 0; @@ -1315,10 +1310,11 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; + // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || - (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + (cpi->oxcf.auto_key && 0))) { cm->frame_type = KEY_FRAME; rc->this_key_frame_forced = cm->current_video_frame != 0 && rc->frames_to_key == 0; diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c index f4def1e..64f3e5a 100644 --- a/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -745,7 +745,8 @@ static void txfm_rd_in_plane(MACROBLOCK *x, int use_fast_coef_casting) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; - struct rdcost_block_args args = { 0 }; + struct rdcost_block_args args; + vp9_zero(args); args.x = x; args.best_rd = ref_best_rd; args.use_fast_coef_costing = use_fast_coef_casting; @@ -806,7 +807,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}}; - int n, m; + TX_SIZE n, m; int s0, s1; const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; int64_t best_rd = INT64_MAX; @@ -889,7 +890,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}}; - int n, m; + TX_SIZE n, m; int s0, s1; double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; @@ -961,7 +962,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_subtract_plane(x, bs, 0); - if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { + if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, bs); @@ -999,7 +1000,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; assert(bs == mbmi->sb_type); - if (cpi->sf.tx_size_search_method != USE_FULL_RD) { + if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, bs); @@ -2312,7 +2313,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; const VP9_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int bestsme = INT_MAX; int step_param; int sadpb = x->sadperbit16; @@ -2514,8 +2515,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, &cpi->fn_ptr[bsize], - &ref_mv[id].as_mv, second_pred, - pw, ph); + &ref_mv[id].as_mv, second_pred); if (bestsme < INT_MAX) bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, second_pred, &cpi->fn_ptr[bsize], 1); @@ -3069,7 +3069,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode = { 0 }; + MB_MODE_INFO best_mbmode; int mode_index, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; @@ -3095,7 +3095,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; - + vp9_zero(best_mbmode); x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, @@ -3678,7 +3678,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode = { 0 }; + MB_MODE_INFO best_mbmode; int ref_index, best_ref_index = 0; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; @@ -3698,6 +3698,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); + vp9_zero(best_mbmode); for (i = 0; i < 4; i++) { int j; diff --git a/source/libvpx/vp9/encoder/vp9_segmentation.c b/source/libvpx/vp9/encoder/vp9_segmentation.c index 7537d1b..574df62 100644 --- a/source/libvpx/vp9/encoder/vp9_segmentation.c +++ b/source/libvpx/vp9/encoder/vp9_segmentation.c @@ -109,7 +109,7 @@ static int cost_segmap(int *segcounts, vp9_prob *probs) { } static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, + MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, @@ -121,7 +121,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - xd->mi = mi_8x8; + xd->mi = mi; segment_id = xd->mi[0]->mbmi.segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -131,7 +131,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, // Temporal prediction not allowed on key frames if (cm->frame_type != KEY_FRAME) { - const BLOCK_SIZE bsize = mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; // Test to see if the segment id matches the predicted value. const int pred_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col); @@ -143,14 +143,14 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, xd->mi[0]->mbmi.seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; + // Update the "unpredicted" segment count if (!pred_flag) - // Update the "unpredicted" segment count t_unpred_seg_counts[segment_id]++; } } static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, + MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, @@ -164,22 +164,22 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; + bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type]; + bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type]; if (bw == bs && bh == bs) { - count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count, + count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, bs, mi_row, mi_col); } else if (bw == bs && bh < bs) { - count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count, + count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, hbs, mi_row, mi_col); - count_segs(cpi, tile, mi_8x8 + hbs * mis, no_pred_segcounts, + count_segs(cpi, tile, mi + hbs * mis, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, hbs, mi_row + hbs, mi_col); } else if (bw < bs && bh == bs) { - count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count, + count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row, mi_col); - count_segs(cpi, tile, mi_8x8 + hbs, + count_segs(cpi, tile, mi + hbs, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs); } else { @@ -192,7 +192,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, const int mi_dc = hbs * (n & 1); const int mi_dr = hbs * (n >> 1); - count_segs_sb(cpi, tile, &mi_8x8[mi_dr * mis + mi_dc], + count_segs_sb(cpi, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, mi_row + mi_dr, mi_col + mi_dc, subsize); @@ -217,9 +217,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[SEG_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; - const int mis = cm->mi_stride; - MODE_INFO **mi_ptr, **mi; - // Set default state for the segment tree probabilities and the // temporal coding probabilities vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); @@ -229,12 +226,13 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // predicts this one for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) { TileInfo tile; - + MODE_INFO **mi_ptr; vp9_tile_init(&tile, cm, 0, tile_col); + mi_ptr = cm->mi_grid_visible + tile.mi_col_start; for (mi_row = 0; mi_row < cm->mi_rows; - mi_row += 8, mi_ptr += 8 * mis) { - mi = mi_ptr; + mi_row += 8, mi_ptr += 8 * cm->mi_stride) { + MODE_INFO **mi = mi_ptr; for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += 8, mi += 8) count_segs_sb(cpi, &tile, mi, no_pred_segcounts, diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h index a384a43..46806c9 100644 --- a/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -176,7 +176,7 @@ typedef struct SPEED_FEATURES { // a log search that iterates 4 times (check around mv for last for best // error of combined predictor then check around mv for alt). If 0 we // we just use the best motion vector found for each frame by itself. - int comp_inter_joint_search_thresh; + BLOCK_SIZE comp_inter_joint_search_thresh; // This variable is used to cap the maximum number of times we skip testing a // mode to be evaluated. A high value means we will be faster. diff --git a/source/libvpx/vp9/encoder/vp9_tokenize.c b/source/libvpx/vp9/encoder/vp9_tokenize.c index 8ce98d9..17214c3 100644 --- a/source/libvpx/vp9/encoder/vp9_tokenize.c +++ b/source/libvpx/vp9/encoder/vp9_tokenize.c @@ -232,7 +232,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, cpi->common.fc.coef_probs[tx_size][type][ref]; unsigned int (*const eob_branch)[COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size][type][ref]; - const uint8_t *const band = get_band_translate(tx_size); const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); @@ -294,6 +293,8 @@ static void is_skippable(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; + (void)plane_bsize; + (void)tx_size; args->skippable[0] &= (!args->x->plane[plane].eobs[block]); } diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm b/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm new file mode 100644 index 0000000..f71181c --- /dev/null +++ b/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm @@ -0,0 +1,70 @@ +; +; Copyright (c) 2014 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +%macro TRANSFORM_COLS 0 + paddw m0, m1 + movq m4, m0 + psubw m3, m2 + psubw m4, m3 + psraw m4, 1 + movq m5, m4 + psubw m5, m1 ;b1 + psubw m4, m2 ;c1 + psubw m0, m4 + paddw m3, m5 + ; m0 a0 + SWAP 1, 4 ; m1 c1 + SWAP 2, 3 ; m2 d1 + SWAP 3, 5 ; m3 b1 +%endmacro + +%macro TRANSPOSE_4X4 0 + movq m4, m0 + movq m5, m2 + punpcklwd m4, m1 + punpckhwd m0, m1 + punpcklwd m5, m3 + punpckhwd m2, m3 + movq m1, m4 + movq m3, m0 + punpckldq m1, m5 + punpckhdq m4, m5 + punpckldq m3, m2 + punpckhdq m0, m2 + SWAP 2, 3, 0, 1, 4 +%endmacro + +INIT_MMX mmx +cglobal fwht4x4, 3, 4, 8, input, output, stride + lea r3q, [inputq + strideq*4] + movq m0, [inputq] ;a1 + movq m1, [inputq + strideq*2] ;b1 + movq m2, [r3q] ;c1 + movq m3, [r3q + strideq*2] ;d1 + + TRANSFORM_COLS + TRANSPOSE_4X4 + TRANSFORM_COLS + TRANSPOSE_4X4 + + psllw m0, 2 + psllw m1, 2 + psllw m2, 2 + psllw m3, 2 + + movq [outputq], m0 + movq [outputq + 8], m1 + movq [outputq + 16], m2 + movq [outputq + 24], m3 + + RET diff --git a/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c new file mode 100644 index 0000000..c67490f --- /dev/null +++ b/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Usee of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <immintrin.h> // AVX2 +#include "vpx/vpx_integer.h" + + +int64_t vp9_block_error_avx2(const int16_t *coeff, + const int16_t *dqcoeff, + intptr_t block_size, + int64_t *ssz) { + __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg; + __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi; + __m256i sse_reg_64hi, ssz_reg_64hi; + __m128i sse_reg128, ssz_reg128; + int64_t sse; + int i; + const __m256i zero_reg = _mm256_set1_epi16(0); + + // init sse and ssz registerd to zero + sse_reg = _mm256_set1_epi16(0); + ssz_reg = _mm256_set1_epi16(0); + + for (i = 0 ; i < block_size ; i+= 16) { + // load 32 bytes from coeff and dqcoeff + coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i)); + dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i)); + // dqcoeff - coeff + dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg); + // madd (dqcoeff - coeff) + dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); + // madd coeff + coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); + // expand each double word of madd (dqcoeff - coeff) to quad word + exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg); + exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg); + // expand each double word of madd (coeff) to quad word + exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg); + exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg); + // add each quad word of madd (dqcoeff - coeff) and madd (coeff) + sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo); + ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo); + sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi); + ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi); + } + // save the higher 64 bit of each 128 bit lane + sse_reg_64hi = _mm256_srli_si256(sse_reg, 8); + ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8); + // add the higher 64 bit to the low 64 bit + sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi); + ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi); + + // add each 64 bit from each of the 128 bit lane of the 256 bit + sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg), + _mm256_extractf128_si256(sse_reg, 1)); + + ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg), + _mm256_extractf128_si256(ssz_reg, 1)); + + // store the results + _mm_storel_epi64((__m128i*)(&sse), sse_reg128); + + _mm_storel_epi64((__m128i*)(ssz), ssz_reg128); + return sse; +} diff --git a/source/libvpx/vp9/vp9_common.mk b/source/libvpx/vp9/vp9_common.mk index eaff60a..3b4d6b9 100644 --- a/source/libvpx/vp9/vp9_common.mk +++ b/source/libvpx/vp9/vp9_common.mk @@ -124,28 +124,28 @@ ifeq ($(ARCH_X86_64), yes) VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm endif -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_avg_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht4x4_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht8x8_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_copy_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_avg_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_reconintra_neon$(ASM) $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c index 449e7d8..2a3964a 100644 --- a/source/libvpx/vp9/vp9_cx_iface.c +++ b/source/libvpx/vp9/vp9_cx_iface.c @@ -42,7 +42,7 @@ struct vp9_extracfg { }; struct extraconfig_map { - int usage; + unsigned int usage; struct vp9_extracfg cfg; }; @@ -245,7 +245,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, layer_id = (int)stats->spatial_layer_id; if (layer_id >= cfg->ss_number_layers - ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1) + ||(unsigned int)(stats->count + 0.5) != + n_packets_per_layer[layer_id] - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } } else { @@ -823,7 +824,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } // Add the frame packet to the list of returned packets. - round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1; + round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = @@ -1003,7 +1004,8 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); if (map) { - if (!vp9_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols)) + if (!vp9_set_active_map(ctx->cpi, map->active_map, + (int)map->rows, (int)map->cols)) return VPX_CODEC_OK; else return VPX_CODEC_INVALID_PARAM; diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c index 06b4823..1d29815 100644 --- a/source/libvpx/vp9/vp9_dx_iface.c +++ b/source/libvpx/vp9/vp9_dx_iface.c @@ -32,21 +32,12 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; - int decoder_init; struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; -#if CONFIG_POSTPROC_VISUALIZER - unsigned int dbg_postproc_flag; - int dbg_color_ref_frame_flag; - int dbg_color_mb_modes_flag; - int dbg_color_b_modes_flag; - int dbg_display_mv_flag; -#endif vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; - int img_setup; int img_avail; int invert_tile_order; @@ -226,36 +217,20 @@ static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) { flags->post_proc_flag = -#if CONFIG_POSTPROC_VISUALIZER - (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | - (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | -#endif ctx->postproc_cfg.post_proc_flag; flags->deblocking_level = ctx->postproc_cfg.deblocking_level; flags->noise_level = ctx->postproc_cfg.noise_level; -#if CONFIG_POSTPROC_VISUALIZER - flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; - flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags->display_mv_flag = ctx->dbg_display_mv_flag; -#endif } static void init_decoder(vpx_codec_alg_priv_t *ctx) { - VP9DecoderConfig oxcf; - oxcf.width = ctx->si.w; - oxcf.height = ctx->si.h; - oxcf.version = 9; - oxcf.max_threads = ctx->cfg.threads; - oxcf.inv_tile_order = ctx->invert_tile_order; - - ctx->pbi = vp9_decoder_create(&oxcf); + ctx->pbi = vp9_decoder_create(); if (ctx->pbi == NULL) return; + ctx->pbi->max_threads = ctx->cfg.threads; + ctx->pbi->inv_tile_order = ctx->invert_tile_order; + vp9_initialize_dec(); // If postprocessing was enabled by the application and a @@ -289,12 +264,10 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, } // Initialize the decoder instance on the first frame - if (!ctx->decoder_init) { + if (ctx->pbi == NULL) { init_decoder(ctx); if (ctx->pbi == NULL) return VPX_CODEC_ERROR; - - ctx->decoder_init = 1; } // Set these even if already initialized. The caller may have changed the @@ -375,80 +348,70 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, } } +static vpx_codec_err_t decode_one_iter(vpx_codec_alg_priv_t *ctx, + const uint8_t **data_start_ptr, + const uint8_t *data_end, + uint32_t frame_size, void *user_priv, + long deadline) { + const vpx_codec_err_t res = decode_one(ctx, data_start_ptr, frame_size, + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + + // Account for suboptimal termination by the encoder. + while (*data_start_ptr < data_end) { + const uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state, + *data_start_ptr); + if (marker) + break; + (*data_start_ptr)++; + } + + return VPX_CODEC_OK; +} + static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { const uint8_t *data_start = data; - const uint8_t *data_end = data + data_sz; - vpx_codec_err_t res = VPX_CODEC_OK; - uint32_t sizes[8]; - int frames_this_pts, frame_count = 0; + const uint8_t *const data_end = data + data_sz; + vpx_codec_err_t res; + uint32_t frame_sizes[8]; + int frame_count; if (data == NULL || data_sz == 0) return VPX_CODEC_INVALID_PARAM; - parse_superframe_index(data, data_sz, sizes, &frames_this_pts, + parse_superframe_index(data, data_sz, frame_sizes, &frame_count, ctx->decrypt_cb, ctx->decrypt_state); - do { - if (data_sz) { - uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state, - data_start); - // Skip over the superframe index, if present - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const uint32_t index_sz = 2 + mag * frames; - - if (data_sz >= index_sz) { - uint8_t marker2 = read_marker(ctx->decrypt_cb, ctx->decrypt_state, - data_start + index_sz - 1); - if (marker2 == marker) { - data_start += index_sz; - data_sz -= index_sz; - if (data_start < data_end) - continue; - else - break; - } - } - } - } - - // Use the correct size for this frame, if an index is present. - if (frames_this_pts) { - uint32_t this_sz = sizes[frame_count]; + if (frame_count > 0) { + int i; - if (data_sz < this_sz) { + for (i = 0; i < frame_count; ++i) { + const uint32_t frame_size = frame_sizes[i]; + if (data_start < data || + frame_size > (uint32_t)(data_end - data_start)) { ctx->base.err_detail = "Invalid frame size in index"; return VPX_CODEC_CORRUPT_FRAME; } - data_sz = this_sz; - frame_count++; + res = decode_one_iter(ctx, &data_start, data_end, frame_size, + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; } - - res = decode_one(ctx, &data_start, data_sz, user_priv, deadline); - assert(data_start >= data); - assert(data_start <= data_end); - - // Early exit if there was a decode error - if (res) - break; - - // Account for suboptimal termination by the encoder. + } else { while (data_start < data_end) { - uint8_t marker3 = read_marker(ctx->decrypt_cb, ctx->decrypt_state, - data_start); - if (marker3) - break; - data_start++; + res = decode_one_iter(ctx, &data_start, data_end, + (uint32_t)(data_end - data_start), + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; } + } - data_sz = (unsigned int)(data_end - data_start); - } while (data_start < data_end); - - return res; + return VPX_CODEC_OK; } static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, @@ -553,22 +516,7 @@ static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - int data = va_arg(args, int); - -#define MAP(id, var) case id: var = data; break; - - switch (ctrl_id) { - MAP(VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag); - MAP(VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag); - MAP(VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag); - MAP(VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag); - } - - return VPX_CODEC_OK; -#else return VPX_CODEC_INCAPABLE; -#endif } static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk index 5e88793..6e5c521 100644 --- a/source/libvpx/vp9/vp9cx.mk +++ b/source/libvpx/vp9/vp9cx.mk @@ -101,7 +101,9 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm ifeq ($(CONFIG_USE_X86INC),yes) +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c diff --git a/source/libvpx/vpx/src/svc_encodeframe.c b/source/libvpx/vpx/src/svc_encodeframe.c index 38c2d26..b874be7 100644 --- a/source/libvpx/vpx/src/svc_encodeframe.c +++ b/source/libvpx/vpx/src/svc_encodeframe.c @@ -234,7 +234,8 @@ static void svc_log_reset(SvcContext *svc_ctx) { si->message_buffer[0] = '\0'; } -static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) { +static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level, + const char *fmt, ...) { char buf[512]; int retval = 0; va_list ap; diff --git a/source/libvpx/vpx_scale/vpx_scale.mk b/source/libvpx/vpx_scale/vpx_scale.mk index ded8e0b..95e7483 100644 --- a/source/libvpx/vpx_scale/vpx_scale.mk +++ b/source/libvpx/vpx_scale/vpx_scale.mk @@ -10,10 +10,10 @@ SCALE_SRCS-yes += vpx_scale_rtcd.c SCALE_SRCS-yes += vpx_scale_rtcd.pl #neon -SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) -SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) -SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) -SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c +SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) +SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) +SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) +SCALE_SRCS-$(HAVE_NEON_ASM) += arm/neon/yv12extend_arm.c #mips(dspr2) SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl index 8c92570..2e3f1ff 100644 --- a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl +++ b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl @@ -17,10 +17,12 @@ if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") { } add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf"; -specialize qw/vp8_yv12_extend_frame_borders neon/; +specialize qw/vp8_yv12_extend_frame_borders neon_asm/; +$vp8_yv12_extend_frame_borders_neon_asm=vp8_yv12_extend_frame_borders_neon; add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; -specialize qw/vp8_yv12_copy_frame neon/; +specialize qw/vp8_yv12_copy_frame neon_asm/; +$vp8_yv12_copy_frame_neon_asm=vp8_yv12_copy_frame_neon; add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c index 6356961..ed37c70 100644 --- a/source/libvpx/vpxdec.c +++ b/source/libvpx/vpxdec.c @@ -33,7 +33,9 @@ #include "./md5_utils.h" #include "./tools_common.h" +#if CONFIG_WEBM_IO #include "./webmdec.h" +#endif #include "./y4menc.h" static const char *exec_name; @@ -528,9 +530,11 @@ int main_loop(int argc, const char **argv_) { struct VpxDecInputContext input = {0}; struct VpxInputContext vpx_input_ctx = {0}; +#if CONFIG_WEBM_IO struct WebmInputContext webm_ctx = {0}; - input.vpx_input_ctx = &vpx_input_ctx; input.webm_ctx = &webm_ctx; +#endif + input.vpx_input_ctx = &vpx_input_ctx; /* Parse command line */ exec_name = argv_[0]; diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c index 8e8ed23..96a7ab6 100644 --- a/source/libvpx/vpxenc.c +++ b/source/libvpx/vpxenc.c @@ -42,7 +42,9 @@ #include "./rate_hist.h" #include "./vpxstats.h" #include "./warnings.h" +#if CONFIG_WEBM_IO #include "./webmenc.h" +#endif #include "./y4minput.h" /* Swallow warnings about unused results of fread/fwrite */ @@ -207,6 +209,7 @@ static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width"); static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height"); +#if CONFIG_WEBM_IO static const struct arg_enum_list stereo_mode_enum[] = { {"mono", STEREO_FORMAT_MONO}, {"left-right", STEREO_FORMAT_LEFT_RIGHT}, @@ -217,6 +220,7 @@ static const struct arg_enum_list stereo_mode_enum[] = { }; static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1, "Stereo 3D video format", stereo_mode_enum); +#endif static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1, "Output timestamp precision (fractional seconds)"); static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1, @@ -226,7 +230,11 @@ static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1, static const arg_def_t *global_args[] = { &use_yv12, &use_i420, &usage, &threads, &profile, - &width, &height, &stereo_mode, &timebase, &framerate, + &width, &height, +#if CONFIG_WEBM_IO + &stereo_mode, +#endif + &timebase, &framerate, &error_resilient, &lag_in_frames, NULL }; @@ -554,6 +562,11 @@ static int compare_img(const vpx_image_t *const img1, NELEMENTS(vp9_arg_ctrl_map)) #endif +#if !CONFIG_WEBM_IO +typedef int stereo_format_t; +struct EbmlGlobal { int debug; }; +#endif + /* Per-stream configuration */ struct stream_config { struct vpx_codec_enc_cfg cfg; @@ -792,9 +805,9 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global, stream->config.cfg.g_h = 0; /* Initialize remaining stream parameters */ - stream->config.stereo_fmt = STEREO_FORMAT_MONO; stream->config.write_webm = 1; #if CONFIG_WEBM_IO + stream->config.stereo_fmt = STEREO_FORMAT_MONO; stream->ebml.last_pts_ns = -1; stream->ebml.writer = NULL; stream->ebml.segment = NULL; @@ -869,8 +882,10 @@ static int parse_stream_params(struct VpxEncoderConfig *global, config->cfg.g_w = arg_parse_uint(&arg); } else if (arg_match(&arg, &height, argi)) { config->cfg.g_h = arg_parse_uint(&arg); +#if CONFIG_WEBM_IO } else if (arg_match(&arg, &stereo_mode, argi)) { config->stereo_fmt = arg_parse_enum_or_int(&arg); +#endif } else if (arg_match(&arg, &timebase, argi)) { config->cfg.g_timebase = arg_parse_rational(&arg); validate_positive_rational(arg.name, &config->cfg.g_timebase); diff --git a/unpack_lib_posix.gypi b/unpack_lib_posix.gypi index 3716314..f26ea13 100644 --- a/unpack_lib_posix.gypi +++ b/unpack_lib_posix.gypi @@ -30,7 +30,7 @@ 'ar_cmd': [], 'conditions': [ ['android_webview_build==1', { - 'ar_cmd': ['-r', '$(realpath $($(GYP_VAR_PREFIX)TARGET_AR))'], + 'ar_cmd': ['-r', '$(abspath $($(gyp_var_prefix)TARGET_AR))'], }], ], }, diff --git a/update_libvpx.sh b/update_libvpx.sh index 2328add..c00b3b0 100755 --- a/update_libvpx.sh +++ b/update_libvpx.sh @@ -11,7 +11,7 @@ # Usage: # -# $ ./update_libvpx.sh [branch | revision | file containing a revision] +# $ ./update_libvpx.sh [branch | revision | file or url containing a revision] # When specifying a branch it may be necessary to prefix with origin/ # Tools required for running this tool: @@ -33,6 +33,8 @@ if [ -n "$1" ]; then GIT_BRANCH="$1" if [ -f "$1" ]; then GIT_BRANCH=$(<"$1") + elif [[ $1 = http* ]]; then + GIT_BRANCH=`curl $1` fi fi |