diff options
author | Android Chromium Automerger <chromium-automerger@android> | 2014-08-21 23:33:20 +0000 |
---|---|---|
committer | Android Chromium Automerger <chromium-automerger@android> | 2014-08-21 23:33:20 +0000 |
commit | 9ab1ab288fd34aec2bc658c25a73523b3ede2c4d (patch) | |
tree | d21d89da4efcd91623988f0c2b5a4434faaa4957 | |
parent | 454f0eb794a3da5dca23d04f1fd6bd4e23e3a297 (diff) | |
parent | ace65784417788374f0b19ce5a8abd06c9ccd007 (diff) | |
download | libvpx-9ab1ab288fd34aec2bc658c25a73523b3ede2c4d.tar.gz |
Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at ace65784417788374f0b19ce5a8abd06c9ccd007
This commit was generated by merge_from_chromium.py.
Change-Id: Id05fddc517eb4e1abd0acb8a601b2a09c09fb602
75 files changed, 1294 insertions, 1087 deletions
diff --git a/README.chromium b/README.chromium index 6c6c318..749aa13 100644 --- a/README.chromium +++ b/README.chromium @@ -5,9 +5,9 @@ License: BSD License File: source/libvpx/LICENSE Security Critical: yes -Date: Friday August 15 2014 +Date: Thursday August 21 2014 Branch: master -Commit: 49fef904054b668881dfb21e37bcb60d571cb2c5 +Commit: 23c88870ec514b0dd7d22b9db99ae63f46c7d87f Description: Contains the sources used to compile libvpx binaries used by Google Chrome and diff --git a/libvpx_srcs_arm64.gypi b/libvpx_srcs_arm64.gypi index a9209e1..cfaa3c4 100644 --- a/libvpx_srcs_arm64.gypi +++ b/libvpx_srcs_arm64.gypi @@ -95,6 +95,8 @@ '<(libvpx_source)/vp8/decoder/treereader.h', '<(libvpx_source)/vp8/encoder/arm/dct_arm.c', '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c', + '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c', + '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c', '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c', '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', diff --git a/libvpx_srcs_arm_neon.gypi b/libvpx_srcs_arm_neon.gypi index f07ea42..515251d 100644 --- a/libvpx_srcs_arm_neon.gypi +++ b/libvpx_srcs_arm_neon.gypi @@ -134,10 +134,10 @@ '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/picklpf_arm.c', '<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.asm', - '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.asm', + '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c', '<(libvpx_source)/vp8/encoder/arm/neon/vp8_memcpy_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm', - '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm', + '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c', '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c', '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', diff --git a/libvpx_srcs_arm_neon_cpu_detect.gypi b/libvpx_srcs_arm_neon_cpu_detect.gypi index 30b5463..53be7d8 100644 --- a/libvpx_srcs_arm_neon_cpu_detect.gypi +++ b/libvpx_srcs_arm_neon_cpu_detect.gypi @@ -120,10 +120,8 @@ '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/picklpf_arm.c', '<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.asm', - '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/vp8_memcpy_neon.asm', '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm', - '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm', '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c', '<(libvpx_source)/vp8/encoder/bitstream.c', '<(libvpx_source)/vp8/encoder/bitstream.h', diff --git a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi index 7eb4e57..6d72a65 100644 --- a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi +++ b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi @@ -27,6 +27,8 @@ '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c', '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c', '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c', + '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c', + '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c', '<(libvpx_source)/vp9/common/arm/neon/vp9_convolve_neon.c', '<(libvpx_source)/vp9/common/arm/neon/vp9_idct16x16_neon.c', '<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_16_neon.c', diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm index 3d6c831..f8b3a15 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 1 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h index 59a41e5..9cfd076 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 1 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm index d60a149..a9eab27 100644 --- a/source/config/linux/arm-neon/vpx_config.asm +++ b/source/config/linux/arm-neon/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 1 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h index fde4ff7..c497ddb 100644 --- a/source/config/linux/arm-neon/vpx_config.h +++ b/source/config/linux/arm-neon/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 1 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm index c183ed2..dd8be51 100644 --- a/source/config/linux/arm/vpx_config.asm +++ b/source/config/linux/arm/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h index 4712aa5..ee5f10d 100644 --- a/source/config/linux/arm/vpx_config.h +++ b/source/config/linux/arm/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/arm64/vp8_rtcd.h b/source/config/linux/arm64/vp8_rtcd.h index daf90f6..9d45b89 100644 --- a/source/config/linux/arm64/vp8_rtcd.h +++ b/source/config/linux/arm64/vp8_rtcd.h @@ -284,7 +284,8 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output); #define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c void vp8_short_walsh4x4_c(short *input, short *output, int pitch); -#define vp8_short_walsh4x4 vp8_short_walsh4x4_c +void vp8_short_walsh4x4_neon(short *input, short *output, int pitch); +#define vp8_short_walsh4x4 vp8_short_walsh4x4_neon void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); @@ -320,13 +321,16 @@ unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char *src_ptr, int sou #define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_c void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch); -#define vp8_subtract_b vp8_subtract_b_c +void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch); +#define vp8_subtract_b vp8_subtract_b_neon void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride); -#define vp8_subtract_mbuv vp8_subtract_mbuv_c +void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride); +#define vp8_subtract_mbuv vp8_subtract_mbuv_neon void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); -#define vp8_subtract_mby vp8_subtract_mby_c +void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); +#define vp8_subtract_mby vp8_subtract_mby_neon unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse); diff --git a/source/config/linux/arm64/vpx_config.asm b/source/config/linux/arm64/vpx_config.asm index b6286bd..bb141a1 100644 --- a/source/config/linux/arm64/vpx_config.asm +++ b/source/config/linux/arm64/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/linux/arm64/vpx_config.h b/source/config/linux/arm64/vpx_config.h index bc1da49..e791223 100644 --- a/source/config/linux/arm64/vpx_config.h +++ b/source/config/linux/arm64/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm index 57105b1..42f23e4 100644 --- a/source/config/linux/generic/vpx_config.asm +++ b/source/config/linux/generic/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h index b8ee73b..75d1415 100644 --- a/source/config/linux/generic/vpx_config.h +++ b/source/config/linux/generic/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm index ae350e2..64b2391 100644 --- a/source/config/linux/ia32/vpx_config.asm +++ b/source/config/linux/ia32/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h index fe3b8ec..705af6e 100644 --- a/source/config/linux/ia32/vpx_config.h +++ b/source/config/linux/ia32/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h index 44e20fb..5e0b6f2 100644 --- a/source/config/linux/mipsel/vpx_config.h +++ b/source/config/linux/mipsel/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 1 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm index ec7f36a..fbc78fb 100644 --- a/source/config/linux/x64/vpx_config.asm +++ b/source/config/linux/x64/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h index 43851b2..8b99a23 100644 --- a/source/config/linux/x64/vpx_config.h +++ b/source/config/linux/x64/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm index c3f1be3..2c14e7a 100644 --- a/source/config/mac/ia32/vpx_config.asm +++ b/source/config/mac/ia32/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h index 3c98250..9b7b399 100644 --- a/source/config/mac/ia32/vpx_config.h +++ b/source/config/mac/ia32/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm index ec7f36a..fbc78fb 100644 --- a/source/config/mac/x64/vpx_config.asm +++ b/source/config/mac/x64/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h index 43851b2..8b99a23 100644 --- a/source/config/mac/x64/vpx_config.h +++ b/source/config/mac/x64/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm index 57105b1..42f23e4 100644 --- a/source/config/nacl/vpx_config.asm +++ b/source/config/nacl/vpx_config.asm @@ -13,6 +13,7 @@ .equ HAVE_NEON_ASM , 0 .equ HAVE_MIPS32 , 0 .equ HAVE_DSPR2 , 0 +.equ HAVE_MIPS64 , 0 .equ HAVE_MMX , 0 .equ HAVE_SSE , 0 .equ HAVE_SSE2 , 0 diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h index b8ee73b..75d1415 100644 --- a/source/config/nacl/vpx_config.h +++ b/source/config/nacl/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 0 #define HAVE_SSE 0 #define HAVE_SSE2 0 diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm index 97bee89..d5afba1 100644 --- a/source/config/win/ia32/vpx_config.asm +++ b/source/config/win/ia32/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h index 8284813..601cd8d 100644 --- a/source/config/win/ia32/vpx_config.h +++ b/source/config/win/ia32/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm index 57bbb92..bb81709 100644 --- a/source/config/win/x64/vpx_config.asm +++ b/source/config/win/x64/vpx_config.asm @@ -10,6 +10,7 @@ HAVE_NEON equ 0 HAVE_NEON_ASM equ 0 HAVE_MIPS32 equ 0 HAVE_DSPR2 equ 0 +HAVE_MIPS64 equ 0 HAVE_MMX equ 1 HAVE_SSE equ 1 HAVE_SSE2 equ 1 diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h index 80d8c44..9747cec 100644 --- a/source/config/win/x64/vpx_config.h +++ b/source/config/win/x64/vpx_config.h @@ -22,6 +22,7 @@ #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 #define HAVE_DSPR2 0 +#define HAVE_MIPS64 0 #define HAVE_MMX 1 #define HAVE_SSE 1 #define HAVE_SSE2 1 diff --git a/source/libvpx/README b/source/libvpx/README index f9c24ff..6f864d8 100644 --- a/source/libvpx/README +++ b/source/libvpx/README @@ -65,6 +65,7 @@ COMPILING THE APPLICATIONS/LIBRARIES: armv7-win32-vs12 armv7s-darwin-gcc mips32-linux-gcc + mips64-linux-gcc ppc32-darwin8-gcc ppc32-darwin9-gcc ppc32-linux-gcc diff --git a/source/libvpx/build/make/rtcd.pl b/source/libvpx/build/make/rtcd.pl index 28ef69c..5b0cefa 100755 --- a/source/libvpx/build/make/rtcd.pl +++ b/source/libvpx/build/make/rtcd.pl @@ -365,13 +365,13 @@ if ($opts{arch} eq 'x86') { @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/); &require(@REQUIRES); x86; -} elsif ($opts{arch} eq 'mips32') { - @ALL_ARCHS = filter(qw/mips32/); +} elsif ($opts{arch} eq 'mips32' || $opts{arch} eq 'mips64') { + @ALL_ARCHS = filter("$opts{arch}"); open CONFIG_FILE, $opts{config} or die "Error opening config file '$opts{config}': $!\n"; while (<CONFIG_FILE>) { if (/HAVE_DSPR2=yes/) { - @ALL_ARCHS = filter(qw/mips32 dspr2/); + @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/); last; } } diff --git a/source/libvpx/configure b/source/libvpx/configure index 92ca061..2708b45 100755 --- a/source/libvpx/configure +++ b/source/libvpx/configure @@ -111,6 +111,7 @@ all_platforms="${all_platforms} armv7-win32-vs11" all_platforms="${all_platforms} armv7-win32-vs12" all_platforms="${all_platforms} armv7s-darwin-gcc" all_platforms="${all_platforms} mips32-linux-gcc" +all_platforms="${all_platforms} mips64-linux-gcc" all_platforms="${all_platforms} ppc32-darwin8-gcc" all_platforms="${all_platforms} ppc32-darwin9-gcc" all_platforms="${all_platforms} ppc32-linux-gcc" @@ -254,6 +255,8 @@ ARCH_EXT_LIST=" mips32 dspr2 + mips64 + mmx sse sse2 diff --git a/source/libvpx/examples/set_maps.c b/source/libvpx/examples/set_maps.c index af8c582..ff60d51 100644 --- a/source/libvpx/examples/set_maps.c +++ b/source/libvpx/examples/set_maps.c @@ -125,10 +125,11 @@ static void unset_active_map(const vpx_codec_enc_cfg_t *cfg, die_codec(codec, "Failed to set active map"); } -static void encode_frame(vpx_codec_ctx_t *codec, - vpx_image_t *img, - int frame_index, - VpxVideoWriter *writer) { +static int encode_frame(vpx_codec_ctx_t *codec, + vpx_image_t *img, + int frame_index, + VpxVideoWriter *writer) { + int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0, @@ -137,6 +138,8 @@ static void encode_frame(vpx_codec_ctx_t *codec, die_codec(codec, "Failed to encode frame"); while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, @@ -150,6 +153,8 @@ static void encode_frame(vpx_codec_ctx_t *codec, fflush(stdout); } } + + return got_pkts; } int main(int argc, char **argv) { @@ -217,6 +222,7 @@ int main(int argc, char **argv) { if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); + // Encode frames. while (vpx_img_read(&raw, infile)) { ++frame_count; @@ -230,7 +236,10 @@ int main(int argc, char **argv) { encode_frame(&codec, &raw, frame_count, writer); } - encode_frame(&codec, NULL, -1, writer); + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, writer)) {} + printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_count); diff --git a/source/libvpx/test/convolve_test.cc b/source/libvpx/test/convolve_test.cc index 5b4a20e..1724db3 100644 --- a/source/libvpx/test/convolve_test.cc +++ b/source/libvpx/test/convolve_test.cc @@ -646,26 +646,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( #endif #if HAVE_AVX2 -// TODO(jzern): these prototypes can be removed after the avx2 versions are -// reenabled in vp9_rtcd_defs.pl. -extern "C" { -void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -} - const ConvolveFunctions convolve8_avx2( vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, @@ -676,9 +656,7 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( make_tuple(8, 4, &convolve8_avx2), make_tuple(4, 8, &convolve8_avx2), make_tuple(8, 8, &convolve8_avx2), - make_tuple(8, 16, &convolve8_avx2))); - -INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values( + make_tuple(8, 16, &convolve8_avx2), make_tuple(16, 8, &convolve8_avx2), make_tuple(16, 16, &convolve8_avx2), make_tuple(32, 16, &convolve8_avx2), diff --git a/source/libvpx/test/frame_size_tests.cc b/source/libvpx/test/frame_size_tests.cc index 2400c20..db27975 100644 --- a/source/libvpx/test/frame_size_tests.cc +++ b/source/libvpx/test/frame_size_tests.cc @@ -72,7 +72,13 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) { // one for each lag in frames (for 2 pass), and then one for each possible // reference buffer (8) - we can end up with up to 30 buffers of roughly this // size or almost 1 gig of memory. + // In total the allocations will exceed 2GiB which may cause a failure with + // mingw + wine, use a smaller size in that case. +#if defined(_WIN32) && !defined(_WIN64) + video.SetSize(4096, 3072); +#else video.SetSize(4096, 4096); +#endif video.set_limit(2); expected_res_ = VPX_CODEC_OK; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); diff --git a/source/libvpx/test/set_maps.sh b/source/libvpx/test/set_maps.sh new file mode 100755 index 0000000..e7c8d43 --- /dev/null +++ b/source/libvpx/test/set_maps.sh @@ -0,0 +1,59 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx set_maps example. To add new tests to this file, +## do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to set_maps_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in +# $LIBVPX_BIN_PATH. +set_maps_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi + if [ -z "$(vpx_tool_path set_maps)" ]; then + elog "set_maps not found. It must exist in LIBVPX_BIN_PATH or its parent." + return 1 + fi +} + +# Runs set_maps using the codec specified by $1. +set_maps() { + local encoder="$(vpx_tool_path set_maps)" + local codec="$1" + local output_file="${VPX_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf" + + eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + ${devnull} + + [ -e "${output_file}" ] || return 1 +} + +set_maps_vp8() { + if [ "$(vp8_encode_available)" = "yes" ]; then + set_maps vp8 || return 1 + fi +} + +set_maps_vp9() { + if [ "$(vp9_encode_available)" = "yes" ]; then + set_maps vp9 || return 1 + fi +} + +set_maps_tests="set_maps_vp8 + set_maps_vp9" + +run_tests set_maps_verify_environment "${set_maps_tests}" diff --git a/source/libvpx/test/subtract_test.cc b/source/libvpx/test/subtract_test.cc index 6619fb1..ff42725 100644 --- a/source/libvpx/test/subtract_test.cc +++ b/source/libvpx/test/subtract_test.cc @@ -105,7 +105,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) { INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest, ::testing::Values(vp8_subtract_b_c)); -#if HAVE_NEON_ASM +#if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest, ::testing::Values(vp8_subtract_b_neon)); #endif diff --git a/source/libvpx/test/svc_test.cc b/source/libvpx/test/svc_test.cc index e9cf38d..1cb01a4 100644 --- a/source/libvpx/test/svc_test.cc +++ b/source/libvpx/test/svc_test.cc @@ -13,6 +13,9 @@ #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/i420_video_source.h" + +#include "vp9/decoder/vp9_decoder.h" + #include "vpx/svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" @@ -21,6 +24,7 @@ namespace { using libvpx_test::CodecFactory; using libvpx_test::Decoder; +using libvpx_test::DxDataIterator; using libvpx_test::VP9CodecFactory; class SvcTest : public ::testing::Test { @@ -62,9 +66,213 @@ class SvcTest : public ::testing::Test { } virtual void TearDown() { - vpx_svc_release(&svc_); + ReleaseEncoder(); delete(decoder_); + } + + void InitializeEncoder() { + const vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; + } + + void ReleaseEncoder() { + vpx_svc_release(&svc_); if (codec_initialized_) vpx_codec_destroy(&codec_); + codec_initialized_ = false; + } + + void Pass1EncodeNFrames(const int n, const int layers, + std::string *const stats_buf) { + vpx_codec_err_t res; + size_t stats_size = 0; + const char *stats_data = NULL; + + ASSERT_GT(n, 0); + ASSERT_GT(layers, 0); + svc_.spatial_layers = layers; + codec_enc_.g_pass = VPX_RC_FIRST_PASS; + InitializeEncoder(); + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + video.Begin(); + + for (int i = 0; i < n; ++i) { + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); + EXPECT_GT(stats_size, 0U); + stats_data = vpx_svc_get_rc_stats_buffer(&svc_); + ASSERT_TRUE(stats_data != NULL); + stats_buf->append(stats_data, stats_size); + video.Next(); + } + + // Flush encoder and test EOS packet + res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); + EXPECT_GT(stats_size, 0U); + stats_data = vpx_svc_get_rc_stats_buffer(&svc_); + ASSERT_TRUE(stats_data != NULL); + stats_buf->append(stats_data, stats_size); + + ReleaseEncoder(); + } + + void StoreFrames(const size_t max_frame_received, + struct vpx_fixed_buf *const outputs, + size_t *const frame_received) { + size_t frame_size; + while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + ASSERT_LT(*frame_received, max_frame_received); + + if (*frame_received == 0) { + EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); + } + + outputs[*frame_received].buf = malloc(frame_size); + ASSERT_TRUE(outputs[*frame_received].buf != NULL); + memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_), + frame_size); + outputs[*frame_received].sz = frame_size; + ++(*frame_received); + } + } + + void Pass2EncodeNFrames(std::string *const stats_buf, + const int n, const int layers, + struct vpx_fixed_buf *const outputs) { + vpx_codec_err_t res; + size_t frame_received = 0; + + ASSERT_TRUE(outputs != NULL); + ASSERT_GT(n, 0); + ASSERT_GT(layers, 0); + svc_.spatial_layers = layers; + codec_enc_.rc_target_bitrate = 500; + if (codec_enc_.g_pass == VPX_RC_LAST_PASS) { + ASSERT_TRUE(stats_buf != NULL); + ASSERT_GT(stats_buf->size(), 0U); + codec_enc_.rc_twopass_stats_in.buf = &(*stats_buf)[0]; + codec_enc_.rc_twopass_stats_in.sz = stats_buf->size(); + } + InitializeEncoder(); + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + video.Begin(); + + for (int i = 0; i < n; ++i) { + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + StoreFrames(n, outputs, &frame_received); + video.Next(); + } + + // Flush Encoder + res = vpx_svc_encode(&svc_, &codec_, NULL, 0, + video.duration(), VPX_DL_GOOD_QUALITY); + EXPECT_EQ(VPX_CODEC_OK, res); + StoreFrames(n, outputs, &frame_received); + + EXPECT_EQ(frame_received, (size_t)n); + + ReleaseEncoder(); + } + + void DecodeNFrames(const struct vpx_fixed_buf *const inputs, const int n) { + int decoded_frames = 0; + int received_frames = 0; + + ASSERT_TRUE(inputs != NULL); + ASSERT_GT(n, 0); + + for (int i = 0; i < n; ++i) { + ASSERT_TRUE(inputs[i].buf != NULL); + ASSERT_GT(inputs[i].sz, 0U); + const vpx_codec_err_t res_dec = + decoder_->DecodeFrame(static_cast<const uint8_t *>(inputs[i].buf), + inputs[i].sz); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + + DxDataIterator dec_iter = decoder_->GetDxData(); + while (dec_iter.Next()) { + ++received_frames; + } + } + EXPECT_EQ(decoded_frames, n); + EXPECT_EQ(received_frames, n); + } + + void DropEnhancementLayers(struct vpx_fixed_buf *const inputs, + const int num_super_frames, + const int remained_layers) { + ASSERT_TRUE(inputs != NULL); + ASSERT_GT(num_super_frames, 0); + ASSERT_GT(remained_layers, 0); + + for (int i = 0; i < num_super_frames; ++i) { + uint32_t frame_sizes[8] = {0}; + int frame_count = 0; + int frames_found = 0; + int frame; + ASSERT_TRUE(inputs[i].buf != NULL); + ASSERT_GT(inputs[i].sz, 0U); + + vpx_codec_err_t res = + vp9_parse_superframe_index(static_cast<const uint8_t*>(inputs[i].buf), + inputs[i].sz, frame_sizes, &frame_count, + NULL, NULL); + ASSERT_EQ(VPX_CODEC_OK, res); + + uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf); + uint8_t *frame_start = frame_data; + for (frame = 0; frame < frame_count; ++frame) { + // Looking for a visible frame + if (frame_data[0] & 0x02) { + ++frames_found; + if (frames_found == remained_layers) + break; + } + frame_data += frame_sizes[frame]; + } + ASSERT_LT(frame, frame_count); + if (frame == frame_count - 1) + continue; + + frame_data += frame_sizes[frame]; + uint8_t marker = + static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1]; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frame_count; + const size_t new_index_sz = 2 + mag * (frame + 1); + marker &= 0x0f8; + marker |= frame; + frame_data[0] = marker; + memcpy(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1, + new_index_sz - 2); + frame_data[new_index_sz - 1] = marker; + inputs[i].sz = frame_data - frame_start + new_index_sz; + } + } + + void FreeBitstreamBuffers(struct vpx_fixed_buf *const inputs, const int n) { + ASSERT_TRUE(inputs != NULL); + ASSERT_GT(n, 0); + + for (int i = 0; i < n; ++i) { + free(inputs[i].buf); + inputs[i].buf = NULL; + inputs[i].sz = 0; + } } SvcContext svc_; @@ -93,9 +301,7 @@ TEST_F(SvcTest, SvcInit) { EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); svc_.spatial_layers = 0; // use default layers - res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers); } @@ -106,9 +312,7 @@ TEST_F(SvcTest, InitTwoLayers) { EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); // valid scale values - res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } TEST_F(SvcTest, InvalidOptions) { @@ -124,18 +328,15 @@ TEST_F(SvcTest, InvalidOptions) { TEST_F(SvcTest, SetLayersOption) { vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=3"); EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); EXPECT_EQ(3, svc_.spatial_layers); } TEST_F(SvcTest, SetMultipleOptions) { vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3"); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); EXPECT_EQ(2, svc_.spatial_layers); } @@ -149,9 +350,7 @@ TEST_F(SvcTest, SetScaleFactorsOption) { res = vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3"); EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } TEST_F(SvcTest, SetQuantizersOption) { @@ -162,9 +361,7 @@ TEST_F(SvcTest, SetQuantizersOption) { EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); vpx_svc_set_options(&svc_, "quantizers=40,45"); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } TEST_F(SvcTest, SetAutoAltRefOption) { @@ -180,9 +377,7 @@ TEST_F(SvcTest, SetAutoAltRefOption) { EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0"); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } TEST_F(SvcTest, SetQuantizers) { @@ -200,9 +395,7 @@ TEST_F(SvcTest, SetQuantizers) { res = vpx_svc_set_quantizers(&svc_, "40,30"); EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } TEST_F(SvcTest, SetScaleFactors) { @@ -220,121 +413,25 @@ TEST_F(SvcTest, SetScaleFactors) { res = vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); } // Test that decoder can handle an SVC frame as the first frame in a sequence. -TEST_F(SvcTest, FirstFrameHasLayers) { - svc_.spatial_layers = 2; - vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); - vpx_svc_set_quantizers(&svc_, "40,30"); - - vpx_codec_err_t res = - vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; - - libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, - codec_enc_.g_timebase.den, - codec_enc_.g_timebase.num, 0, 30); - video.Begin(); - - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - EXPECT_EQ(VPX_CODEC_OK, res); - - if (vpx_svc_get_frame_size(&svc_) == 0) { - // Flush encoder - res = vpx_svc_encode(&svc_, &codec_, NULL, 0, - video.duration(), VPX_DL_GOOD_QUALITY); - EXPECT_EQ(VPX_CODEC_OK, res); - } - - int frame_size = vpx_svc_get_frame_size(&svc_); - EXPECT_GT(frame_size, 0); - const vpx_codec_err_t res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - - // this test fails with a decoder error - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); +TEST_F(SvcTest, OnePassEncodeOneFrame) { + codec_enc_.g_pass = VPX_RC_ONE_PASS; + vpx_fixed_buf output = {0}; + Pass2EncodeNFrames(NULL, 1, 2, &output); + DecodeNFrames(&output, 1); + FreeBitstreamBuffers(&output, 1); } -TEST_F(SvcTest, EncodeThreeFrames) { - svc_.spatial_layers = 2; - vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); - vpx_svc_set_quantizers(&svc_, "40,30"); - int decoded_frames = 0; - vpx_codec_err_t res_dec; - int frame_size; - - vpx_codec_err_t res = - vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - ASSERT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; - - libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, - codec_enc_.g_timebase.den, - codec_enc_.g_timebase.num, 0, 30); - // FRAME 0 - video.Begin(); - // This frame is a keyframe. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } - - // FRAME 1 - video.Next(); - // This is a P-frame. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } - - // FRAME 2 - video.Next(); - // This is a P-frame. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } - - // Flush encoder - res = vpx_svc_encode(&svc_, &codec_, NULL, 0, - video.duration(), VPX_DL_GOOD_QUALITY); - EXPECT_EQ(VPX_CODEC_OK, res); - - while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } - - EXPECT_EQ(decoded_frames, 3); +TEST_F(SvcTest, OnePassEncodeThreeFrames) { + codec_enc_.g_pass = VPX_RC_ONE_PASS; + vpx_fixed_buf outputs[3]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]); + DecodeNFrames(&outputs[0], 3); + FreeBitstreamBuffers(&outputs[0], 3); } TEST_F(SvcTest, GetLayerResolution) { @@ -342,14 +439,11 @@ TEST_F(SvcTest, GetLayerResolution) { vpx_svc_set_scale_factors(&svc_, "4/16,8/16"); vpx_svc_set_quantizers(&svc_, "40,30"); - vpx_codec_err_t res = - vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; + InitializeEncoder(); // ensure that requested layer is a valid layer uint32_t layer_width, layer_height; - res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers, + vpx_codec_err_t res = vpx_svc_get_layer_resolution(&svc_, svc_.spatial_layers, &layer_width, &layer_height); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); @@ -373,131 +467,113 @@ TEST_F(SvcTest, GetLayerResolution) { EXPECT_EQ(kHeight * 8 / 16, layer_height); } -TEST_F(SvcTest, TwoPassEncode) { +TEST_F(SvcTest, TwoPassEncode10Frames) { // First pass encode std::string stats_buf; - svc_.spatial_layers = 2; - codec_enc_.g_pass = VPX_RC_FIRST_PASS; - vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); - vpx_svc_set_quantizers(&svc_, "40,30"); + Pass1EncodeNFrames(10, 2, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode20FramesWithAltRef) { + // First pass encode + std::string stats_buf; + Pass1EncodeNFrames(20, 2, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); + vpx_fixed_buf outputs[20]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]); + DecodeNFrames(&outputs[0], 20); + FreeBitstreamBuffers(&outputs[0], 20); +} - vpx_codec_err_t res = - vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - ASSERT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; - - libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, - codec_enc_.g_timebase.den, - codec_enc_.g_timebase.num, 0, 30); - // FRAME 0 - video.Begin(); - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - size_t stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); - EXPECT_GT(stats_size, 0U); - const char *stats_data = vpx_svc_get_rc_stats_buffer(&svc_); - ASSERT_TRUE(stats_data != NULL); - stats_buf.append(stats_data, stats_size); - - // FRAME 1 - video.Next(); - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); - EXPECT_GT(stats_size, 0U); - stats_data = vpx_svc_get_rc_stats_buffer(&svc_); - ASSERT_TRUE(stats_data != NULL); - stats_buf.append(stats_data, stats_size); - - // Flush encoder and test EOS packet - res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); - EXPECT_GT(stats_size, 0U); - stats_data = vpx_svc_get_rc_stats_buffer(&svc_); - ASSERT_TRUE(stats_data != NULL); - stats_buf.append(stats_data, stats_size); - - // Tear down encoder - vpx_svc_release(&svc_); - vpx_codec_destroy(&codec_); +TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) { + // First pass encode + std::string stats_buf; + Pass1EncodeNFrames(10, 2, &stats_buf); // Second pass encode - int decoded_frames = 0; - vpx_codec_err_t res_dec; - int frame_size; codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); - vpx_svc_set_quantizers(&svc_, "40,30"); vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); - codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0]; - codec_enc_.rc_twopass_stats_in.sz = stats_buf.size(); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); + DropEnhancementLayers(&outputs[0], 10, 1); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - ASSERT_EQ(VPX_CODEC_OK, res); - codec_initialized_ = true; - - // FRAME 0 - video.Begin(); - // This frame is a keyframe. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } +TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) { + // First pass encode + std::string stats_buf; + Pass1EncodeNFrames(10, 5, &stats_buf); - // FRAME 1 - video.Next(); - // This is a P-frame. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]); + + DecodeNFrames(&outputs[0], 10); + DropEnhancementLayers(&outputs[0], 10, 4); + DecodeNFrames(&outputs[0], 10); + DropEnhancementLayers(&outputs[0], 10, 3); + DecodeNFrames(&outputs[0], 10); + DropEnhancementLayers(&outputs[0], 10, 2); + DecodeNFrames(&outputs[0], 10); + DropEnhancementLayers(&outputs[0], 10, 1); + DecodeNFrames(&outputs[0], 10); + + FreeBitstreamBuffers(&outputs[0], 10); +} - // FRAME 2 - video.Next(); - // This is a P-frame. - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - - if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } +TEST_F(SvcTest, TwoPassEncode2SNRLayers) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1"); + Pass1EncodeNFrames(20, 2, &stats_buf); - // Flush encoder - res = vpx_svc_encode(&svc_, &codec_, NULL, 0, - video.duration(), VPX_DL_GOOD_QUALITY); - EXPECT_EQ(VPX_CODEC_OK, res); + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + vpx_svc_set_options(&svc_, + "auto-alt-refs=1,1 scale-factors=1/1,1/1"); + vpx_fixed_buf outputs[20]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]); + DecodeNFrames(&outputs[0], 20); + FreeBitstreamBuffers(&outputs[0], 20); +} - while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { - EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - } +TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1"); + Pass1EncodeNFrames(20, 3, &stats_buf); - EXPECT_EQ(decoded_frames, 3); + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + vpx_svc_set_options(&svc_, + "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1"); + vpx_fixed_buf outputs[20]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]); + DecodeNFrames(&outputs[0], 20); + DropEnhancementLayers(&outputs[0], 20, 2); + DecodeNFrames(&outputs[0], 20); + DropEnhancementLayers(&outputs[0], 20, 1); + DecodeNFrames(&outputs[0], 20); + + FreeBitstreamBuffers(&outputs[0], 20); } } // namespace diff --git a/source/libvpx/test/vpxenc.sh b/source/libvpx/test/vpxenc.sh index 83818aa..b6482c6 100755 --- a/source/libvpx/test/vpxenc.sh +++ b/source/libvpx/test/vpxenc.sh @@ -20,7 +20,7 @@ readonly TEST_FRAMES=10 # Environment check: Make sure input is available. vpxenc_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then - echo "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH." + elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ -z "$(vpx_tool_path vpxenc)" ]; then @@ -49,7 +49,9 @@ vpxenc_pipe() { local readonly encoder="$(vpx_tool_path vpxenc)" local readonly input="$1" shift - cat "${input}" | eval "${VPX_TEST_PREFIX}" "${encoder}" - "$@" ${devnull} + cat "${input}" | eval "${VPX_TEST_PREFIX}" "${encoder}" - \ + --test-decode=fatal \ + "$@" ${devnull} } # Wrapper function for running vpxenc. Requires that LIBVPX_BIN_PATH points to @@ -59,7 +61,9 @@ vpxenc() { local readonly encoder="$(vpx_tool_path vpxenc)" local readonly input="${1}" shift - eval "${VPX_TEST_PREFIX}" "${encoder}" "$input" "$@" ${devnull} + eval "${VPX_TEST_PREFIX}" "${encoder}" "$input" \ + --test-decode=fatal \ + "$@" ${devnull} } vpxenc_vp8_ivf() { @@ -80,6 +84,66 @@ vpxenc_vp8_ivf() { fi } +vpxenc_vp8_webm() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" + vpxenc --codec=vp8 \ + --width="${YUV_RAW_INPUT_WIDTH}" \ + --height="${YUV_RAW_INPUT_HEIGHT}" \ + --limit="${TEST_FRAMES}" \ + --output="${output}" \ + "${YUV_RAW_INPUT}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +vpxenc_vp8_webm_2pass() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" + vpxenc --codec=vp8 \ + --width="${YUV_RAW_INPUT_WIDTH}" \ + --height="${YUV_RAW_INPUT_HEIGHT}" \ + --limit="${TEST_FRAMES}" \ + --output="${output}" \ + --passes=2 \ + "${YUV_RAW_INPUT}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +vpxenc_vp8_webm_lag10_frames20() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly lag_total_frames=20 + local readonly lag_frames=10 + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_lag10_frames20.webm" + vpxenc --codec=vp8 \ + --width="${YUV_RAW_INPUT_WIDTH}" \ + --height="${YUV_RAW_INPUT_HEIGHT}" \ + --limit="${lag_total_frames}" \ + --lag-in-frames="${lag_frames}" \ + --output="${output}" \ + --auto-alt-ref=1 \ + --passes=2 \ + "${YUV_RAW_INPUT}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + vpxenc_vp8_ivf_piped_input() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf" @@ -99,14 +163,14 @@ vpxenc_vp8_ivf_piped_input() { fi } -vpxenc_vp8_webm() { - if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ - [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" - vpxenc --codec=vp8 \ +vpxenc_vp9_ivf() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf" + vpxenc --codec=vp9 \ --width="${YUV_RAW_INPUT_WIDTH}" \ --height="${YUV_RAW_INPUT_HEIGHT}" \ --limit="${TEST_FRAMES}" \ + --ivf \ --output="${output}" \ "${YUV_RAW_INPUT}" @@ -117,15 +181,14 @@ vpxenc_vp8_webm() { fi } -vpxenc_vp9_ivf() { - if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf" +vpxenc_vp9_webm() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" vpxenc --codec=vp9 \ --width="${YUV_RAW_INPUT_WIDTH}" \ --height="${YUV_RAW_INPUT_HEIGHT}" \ --limit="${TEST_FRAMES}" \ - --ivf \ - --test-decode=fatal \ --output="${output}" \ "${YUV_RAW_INPUT}" @@ -136,7 +199,7 @@ vpxenc_vp9_ivf() { fi } -vpxenc_vp9_webm() { +vpxenc_vp9_webm_2pass() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" @@ -146,6 +209,7 @@ vpxenc_vp9_webm() { --limit="${TEST_FRAMES}" \ --test-decode=fatal \ --output="${output}" \ + --passes=2 \ "${YUV_RAW_INPUT}" if [ ! -e "${output}" ]; then @@ -165,7 +229,6 @@ vpxenc_vp9_ivf_lossless() { --ivf \ --output="${output}" \ --lossless=1 \ - --test-decode=fatal \ "${YUV_RAW_INPUT}" if [ ! -e "${output}" ]; then @@ -186,7 +249,30 @@ vpxenc_vp9_ivf_minq0_maxq0() { --output="${output}" \ --min-q=0 \ --max-q=0 \ + "${YUV_RAW_INPUT}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +vpxenc_vp9_webm_lag10_frames20() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local readonly lag_total_frames=20 + local readonly lag_frames=10 + local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lag10_frames20.webm" + vpxenc --codec=vp9 \ + --width="${YUV_RAW_INPUT_WIDTH}" \ + --height="${YUV_RAW_INPUT_HEIGHT}" \ + --limit="${lag_total_frames}" \ + --lag-in-frames="${lag_frames}" \ + --output="${output}" \ --test-decode=fatal \ + --passes=2 \ + --auto-alt-ref=1 \ "${YUV_RAW_INPUT}" if [ ! -e "${output}" ]; then @@ -198,10 +284,14 @@ vpxenc_vp9_ivf_minq0_maxq0() { vpxenc_tests="vpxenc_vp8_ivf vpxenc_vp8_webm + vpxenc_vp8_webm_2pass + vpxenc_vp8_webm_lag10_frames20 vpxenc_vp8_ivf_piped_input vpxenc_vp9_ivf vpxenc_vp9_webm + vpxenc_vp9_webm_2pass vpxenc_vp9_ivf_lossless - vpxenc_vp9_ivf_minq0_maxq0" + vpxenc_vp9_ivf_minq0_maxq0 + vpxenc_vp9_webm_lag10_frames20" run_tests vpxenc_verify_environment "${vpxenc_tests}" diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl index fd9afd2..204cbf0 100644 --- a/source/libvpx/vp8/common/rtcd_defs.pl +++ b/source/libvpx/vp8/common/rtcd_defs.pl @@ -456,9 +456,8 @@ $vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6; $vp8_short_fdct8x4_neon_asm=vp8_short_fdct8x4_neon; add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; -specialize qw/vp8_short_walsh4x4 sse2 media neon_asm/; +specialize qw/vp8_short_walsh4x4 sse2 media neon/; $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; -$vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon; # # Quantizer @@ -503,19 +502,16 @@ specialize qw/vp8_mbuverror mmx sse2/; $vp8_mbuverror_sse2=vp8_mbuverror_xmm; add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch"; -specialize qw/vp8_subtract_b mmx sse2 media neon_asm/; +specialize qw/vp8_subtract_b mmx sse2 media neon/; $vp8_subtract_b_media=vp8_subtract_b_armv6; -$vp8_subtract_b_neon_asm=vp8_subtract_b_neon; add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"; -specialize qw/vp8_subtract_mby mmx sse2 media neon_asm/; +specialize qw/vp8_subtract_mby mmx sse2 media neon/; $vp8_subtract_mby_media=vp8_subtract_mby_armv6; -$vp8_subtract_mby_neon_asm=vp8_subtract_mby_neon; add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"; -specialize qw/vp8_subtract_mbuv mmx sse2 media neon_asm/; +specialize qw/vp8_subtract_mbuv mmx sse2 media neon/; $vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6; -$vp8_subtract_mbuv_neon_asm=vp8_subtract_mbuv_neon; # # Motion search diff --git a/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm b/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm deleted file mode 100644 index 840cb33..0000000 --- a/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_subtract_b_neon| - EXPORT |vp8_subtract_mby_neon| - EXPORT |vp8_subtract_mbuv_neon| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) -|vp8_subtract_b_neon| PROC - - stmfd sp!, {r4-r7} - - ldr r3, [r0, #vp8_block_base_src] - ldr r4, [r0, #vp8_block_src] - ldr r5, [r0, #vp8_block_src_diff] - ldr r3, [r3] - ldr r6, [r0, #vp8_block_src_stride] - add r3, r3, r4 ; src = *base_src + src - ldr r7, [r1, #vp8_blockd_predictor] - - vld1.8 {d0}, [r3], r6 ;load src - vld1.8 {d1}, [r7], r2 ;load pred - vld1.8 {d2}, [r3], r6 - vld1.8 {d3}, [r7], r2 - vld1.8 {d4}, [r3], r6 - vld1.8 {d5}, [r7], r2 - vld1.8 {d6}, [r3], r6 - vld1.8 {d7}, [r7], r2 - - vsubl.u8 q10, d0, d1 - vsubl.u8 q11, d2, d3 - vsubl.u8 q12, d4, d5 - vsubl.u8 q13, d6, d7 - - mov r2, r2, lsl #1 - - vst1.16 {d20}, [r5], r2 ;store diff - vst1.16 {d22}, [r5], r2 - vst1.16 {d24}, [r5], r2 - vst1.16 {d26}, [r5], r2 - - ldmfd sp!, {r4-r7} - bx lr - - ENDP - - -;========================================== -;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride -; unsigned char *pred, int pred_stride) -|vp8_subtract_mby_neon| PROC - push {r4-r7} - vpush {d8-d15} - - mov r12, #4 - ldr r4, [sp, #80] ; pred_stride - mov r6, #32 ; "diff" stride x2 - add r5, r0, #16 ; second diff pointer - -subtract_mby_loop - vld1.8 {q0}, [r1], r2 ;load src - vld1.8 {q1}, [r3], r4 ;load pred - vld1.8 {q2}, [r1], r2 - vld1.8 {q3}, [r3], r4 - vld1.8 {q4}, [r1], r2 - vld1.8 {q5}, [r3], r4 - vld1.8 {q6}, [r1], r2 - vld1.8 {q7}, [r3], r4 - - vsubl.u8 q8, d0, d2 - vsubl.u8 q9, d1, d3 - vsubl.u8 q10, d4, d6 - vsubl.u8 q11, d5, d7 - vsubl.u8 q12, d8, d10 - vsubl.u8 q13, d9, d11 - vsubl.u8 q14, d12, d14 - vsubl.u8 q15, d13, d15 - - vst1.16 {q8}, [r0], r6 ;store diff - vst1.16 {q9}, [r5], r6 - vst1.16 {q10}, [r0], r6 - vst1.16 {q11}, [r5], r6 - vst1.16 {q12}, [r0], r6 - vst1.16 {q13}, [r5], r6 - vst1.16 {q14}, [r0], r6 - vst1.16 {q15}, [r5], r6 - - subs r12, r12, #1 - bne subtract_mby_loop - - vpop {d8-d15} - pop {r4-r7} - bx lr - ENDP - -;================================= -;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, -; int src_stride, unsigned char *upred, -; unsigned char *vpred, int pred_stride) - -|vp8_subtract_mbuv_neon| PROC - push {r4-r7} - vpush {d8-d15} - - ldr r4, [sp, #80] ; upred - ldr r5, [sp, #84] ; vpred - ldr r6, [sp, #88] ; pred_stride - add r0, r0, #512 ; short *udiff = diff + 256; - mov r12, #32 ; "diff" stride x2 - add r7, r0, #16 ; second diff pointer - -;u - vld1.8 {d0}, [r1], r3 ;load usrc - vld1.8 {d1}, [r4], r6 ;load upred - vld1.8 {d2}, [r1], r3 - vld1.8 {d3}, [r4], r6 - vld1.8 {d4}, [r1], r3 - vld1.8 {d5}, [r4], r6 - vld1.8 {d6}, [r1], r3 - vld1.8 {d7}, [r4], r6 - vld1.8 {d8}, [r1], r3 - vld1.8 {d9}, [r4], r6 - vld1.8 {d10}, [r1], r3 - vld1.8 {d11}, [r4], r6 - vld1.8 {d12}, [r1], r3 - vld1.8 {d13}, [r4], r6 - vld1.8 {d14}, [r1], r3 - vld1.8 {d15}, [r4], r6 - - vsubl.u8 q8, d0, d1 - vsubl.u8 q9, d2, d3 - vsubl.u8 q10, d4, d5 - vsubl.u8 q11, d6, d7 - vsubl.u8 q12, d8, d9 - vsubl.u8 q13, d10, d11 - vsubl.u8 q14, d12, d13 - vsubl.u8 q15, d14, d15 - - vst1.16 {q8}, [r0], r12 ;store diff - vst1.16 {q9}, [r7], r12 - vst1.16 {q10}, [r0], r12 - vst1.16 {q11}, [r7], r12 - vst1.16 {q12}, [r0], r12 - vst1.16 {q13}, [r7], r12 - vst1.16 {q14}, [r0], r12 - vst1.16 {q15}, [r7], r12 - -;v - vld1.8 {d0}, [r2], r3 ;load vsrc - vld1.8 {d1}, [r5], r6 ;load vpred - vld1.8 {d2}, [r2], r3 - vld1.8 {d3}, [r5], r6 - vld1.8 {d4}, [r2], r3 - vld1.8 {d5}, [r5], r6 - vld1.8 {d6}, [r2], r3 - vld1.8 {d7}, [r5], r6 - vld1.8 {d8}, [r2], r3 - vld1.8 {d9}, [r5], r6 - vld1.8 {d10}, [r2], r3 - vld1.8 {d11}, [r5], r6 - vld1.8 {d12}, [r2], r3 - vld1.8 {d13}, [r5], r6 - vld1.8 {d14}, [r2], r3 - vld1.8 {d15}, [r5], r6 - - vsubl.u8 q8, d0, d1 - vsubl.u8 q9, d2, d3 - vsubl.u8 q10, d4, d5 - vsubl.u8 q11, d6, d7 - vsubl.u8 q12, d8, d9 - vsubl.u8 q13, d10, d11 - vsubl.u8 q14, d12, d13 - vsubl.u8 q15, d14, d15 - - vst1.16 {q8}, [r0], r12 ;store diff - vst1.16 {q9}, [r7], r12 - vst1.16 {q10}, [r0], r12 - vst1.16 {q11}, [r7], r12 - vst1.16 {q12}, [r0], r12 - vst1.16 {q13}, [r7], r12 - vst1.16 {q14}, [r0], r12 - vst1.16 {q15}, [r7], r12 - - vpop {d8-d15} - pop {r4-r7} - bx lr - - ENDP - - END diff --git a/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c b/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c new file mode 100644 index 0000000..d3ab7b1 --- /dev/null +++ b/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include "vp8/encoder/block.h" + +void vp8_subtract_b_neon( + BLOCK *be, + BLOCKD *bd, + int pitch) { + unsigned char *src_ptr, *predictor; + int src_stride; + int16_t *src_diff; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; + uint16x8_t q10u16, q11u16, q12u16, q13u16; + + src_ptr = *be->base_src + be->src; + src_stride = be->src_stride; + predictor = bd->predictor; + + d0u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d2u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d4u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d6u8 = vld1_u8(src_ptr); + + d1u8 = vld1_u8(predictor); + predictor += pitch; + d3u8 = vld1_u8(predictor); + predictor += pitch; + d5u8 = vld1_u8(predictor); + predictor += pitch; + d7u8 = vld1_u8(predictor); + + q10u16 = vsubl_u8(d0u8, d1u8); + q11u16 = vsubl_u8(d2u8, d3u8); + q12u16 = vsubl_u8(d4u8, d5u8); + q13u16 = vsubl_u8(d6u8, d7u8); + + src_diff = be->src_diff; + vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16)); + src_diff += pitch; + vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16)); + src_diff += pitch; + vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16)); + src_diff += pitch; + vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16)); + return; +} + +void vp8_subtract_mby_neon( + int16_t *diff, + unsigned char *src, + int src_stride, + unsigned char *pred, + int pred_stride) { + int i; + uint8x16_t q0u8, q1u8, q2u8, q3u8; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + for (i = 0; i < 8; i++) { // subtract_mby_loop + q0u8 = vld1q_u8(src); + src += src_stride; + q2u8 = vld1q_u8(src); + src += src_stride; + q1u8 = vld1q_u8(pred); + pred += pred_stride; + q3u8 = vld1q_u8(pred); + pred += pred_stride; + + q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8)); + q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8)); + q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8)); + q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8)); + + vst1q_u16((uint16_t *)diff, q8u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q9u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q10u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q11u16); + diff += 8; + } + return; +} + +void vp8_subtract_mbuv_neon( + int16_t *diff, + unsigned char *usrc, + unsigned char *vsrc, + int src_stride, + unsigned char *upred, + unsigned char *vpred, + int pred_stride) { + int i, j; + unsigned char *src_ptr, *pred_ptr; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + diff += 256; + for (i = 0; i < 2; i++) { + if (i == 0) { + src_ptr = usrc; + pred_ptr = upred; + } else if (i == 1) { + src_ptr = vsrc; + pred_ptr = vpred; + } + + for (j = 0; j < 2; j++) { + d0u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d1u8 = vld1_u8(pred_ptr); + pred_ptr += pred_stride; + d2u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d3u8 = vld1_u8(pred_ptr); + pred_ptr += pred_stride; + d4u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d5u8 = vld1_u8(pred_ptr); + pred_ptr += pred_stride; + d6u8 = vld1_u8(src_ptr); + src_ptr += src_stride; + d7u8 = vld1_u8(pred_ptr); + pred_ptr += pred_stride; + + q8u16 = vsubl_u8(d0u8, d1u8); + q9u16 = vsubl_u8(d2u8, d3u8); + q10u16 = vsubl_u8(d4u8, d5u8); + q11u16 = vsubl_u8(d6u8, d7u8); + + vst1q_u16((uint16_t *)diff, q8u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q9u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q10u16); + diff += 8; + vst1q_u16((uint16_t *)diff, q11u16); + diff += 8; + } + } + return; +} diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm deleted file mode 100644 index 2226629..0000000 --- a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm +++ /dev/null @@ -1,103 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_walsh4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_short_walsh4x4_neon(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_neon| PROC - - vld1.16 {d0}, [r0@64], r2 ; load input - vld1.16 {d1}, [r0@64], r2 - vld1.16 {d2}, [r0@64], r2 - vld1.16 {d3}, [r0@64] - - ;First for-loop - ;transpose d0, d1, d2, d3. Then, d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - - vmov.s32 q15, #3 ; add 3 to all values - - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vadd.s16 d4, d0, d2 ; ip[0] + ip[2] - vadd.s16 d5, d1, d3 ; ip[1] + ip[3] - vsub.s16 d6, d1, d3 ; ip[1] - ip[3] - vsub.s16 d7, d0, d2 ; ip[0] - ip[2] - - vshl.s16 d4, d4, #2 ; a1 = (ip[0] + ip[2]) << 2 - vshl.s16 d5, d5, #2 ; d1 = (ip[1] + ip[3]) << 2 - vshl.s16 d6, d6, #2 ; c1 = (ip[1] - ip[3]) << 2 - vceq.s16 d16, d4, #0 ; a1 == 0 - vshl.s16 d7, d7, #2 ; b1 = (ip[0] - ip[2]) << 2 - - vadd.s16 d0, d4, d5 ; a1 + d1 - vmvn d16, d16 ; a1 != 0 - vsub.s16 d3, d4, d5 ; op[3] = a1 - d1 - vadd.s16 d1, d7, d6 ; op[1] = b1 + c1 - vsub.s16 d2, d7, d6 ; op[2] = b1 - c1 - vsub.s16 d0, d0, d16 ; op[0] = a1 + d1 + (a1 != 0) - - ;Second for-loop - ;transpose d0, d1, d2, d3, Then, d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] - vtrn.32 d1, d3 - vtrn.32 d0, d2 - vtrn.16 d2, d3 - vtrn.16 d0, d1 - - vaddl.s16 q8, d0, d2 ; a1 = ip[0]+ip[8] - vaddl.s16 q9, d1, d3 ; d1 = ip[4]+ip[12] - vsubl.s16 q10, d1, d3 ; c1 = ip[4]-ip[12] - vsubl.s16 q11, d0, d2 ; b1 = ip[0]-ip[8] - - vadd.s32 q0, q8, q9 ; a2 = a1 + d1 - vadd.s32 q1, q11, q10 ; b2 = b1 + c1 - vsub.s32 q2, q11, q10 ; c2 = b1 - c1 - vsub.s32 q3, q8, q9 ; d2 = a1 - d1 - - vclt.s32 q8, q0, #0 - vclt.s32 q9, q1, #0 - vclt.s32 q10, q2, #0 - vclt.s32 q11, q3, #0 - - ; subtract -1 (or 0) - vsub.s32 q0, q0, q8 ; a2 += a2 < 0 - vsub.s32 q1, q1, q9 ; b2 += b2 < 0 - vsub.s32 q2, q2, q10 ; c2 += c2 < 0 - vsub.s32 q3, q3, q11 ; d2 += d2 < 0 - - vadd.s32 q8, q0, q15 ; a2 + 3 - vadd.s32 q9, q1, q15 ; b2 + 3 - vadd.s32 q10, q2, q15 ; c2 + 3 - vadd.s32 q11, q3, q15 ; d2 + 3 - - ; vrshrn? would add 1 << 3-1 = 2 - vshrn.s32 d0, q8, #3 - vshrn.s32 d1, q9, #3 - vshrn.s32 d2, q10, #3 - vshrn.s32 d3, q11, #3 - - vst1.16 {q0, q1}, [r1@128] - - bx lr - - ENDP - - END diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c new file mode 100644 index 0000000..d6b67f8 --- /dev/null +++ b/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +void vp8_short_walsh4x4_neon( + int16_t *input, + int16_t *output, + int pitch) { + uint16x4_t d16u16; + int16x8_t q0s16, q1s16; + int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32; + int32x4_t q9s32, q10s32, q11s32, q15s32; + uint32x4_t q8u32, q9u32, q10u32, q11u32; + int16x4x2_t v2tmp0, v2tmp1; + int32x2x2_t v2tmp2, v2tmp3; + + dEmptys16 = vdup_n_s16(0); + qEmptys32 = vdupq_n_s32(0); + q15s32 = vdupq_n_s32(3); + + d0s16 = vld1_s16(input); + input += pitch/2; + d1s16 = vld1_s16(input); + input += pitch/2; + d2s16 = vld1_s16(input); + input += pitch/2; + d3s16 = vld1_s16(input); + + v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), + vreinterpret_s32_s16(d2s16)); + v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), + vreinterpret_s32_s16(d3s16)); + v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 + vreinterpret_s16_s32(v2tmp3.val[0])); // d1 + v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 + vreinterpret_s16_s32(v2tmp3.val[1])); // d3 + + d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[0]); + d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[1]); + d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[1]); + d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[0]); + + d4s16 = vshl_n_s16(d4s16, 2); + d5s16 = vshl_n_s16(d5s16, 2); + d6s16 = vshl_n_s16(d6s16, 2); + d7s16 = vshl_n_s16(d7s16, 2); + + d16u16 = vceq_s16(d4s16, dEmptys16); + d16u16 = vmvn_u16(d16u16); + + d0s16 = vadd_s16(d4s16, d5s16); + d3s16 = vsub_s16(d4s16, d5s16); + d1s16 = vadd_s16(d7s16, d6s16); + d2s16 = vsub_s16(d7s16, d6s16); + + d0s16 = vsub_s16(d0s16, vreinterpret_s16_u16(d16u16)); + + // Second for-loop + v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d1s16), + vreinterpret_s32_s16(d3s16)); + v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d0s16), + vreinterpret_s32_s16(d2s16)); + v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[1]), // d2 + vreinterpret_s16_s32(v2tmp2.val[1])); // d3 + v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[0]), // d0 + vreinterpret_s16_s32(v2tmp2.val[0])); // d1 + + q8s32 = vaddl_s16(v2tmp1.val[0], v2tmp0.val[0]); + q9s32 = vaddl_s16(v2tmp1.val[1], v2tmp0.val[1]); + q10s32 = vsubl_s16(v2tmp1.val[1], v2tmp0.val[1]); + q11s32 = vsubl_s16(v2tmp1.val[0], v2tmp0.val[0]); + + q0s32 = vaddq_s32(q8s32, q9s32); + q1s32 = vaddq_s32(q11s32, q10s32); + q2s32 = vsubq_s32(q11s32, q10s32); + q3s32 = vsubq_s32(q8s32, q9s32); + + q8u32 = vcltq_s32(q0s32, qEmptys32); + q9u32 = vcltq_s32(q1s32, qEmptys32); + q10u32 = vcltq_s32(q2s32, qEmptys32); + q11u32 = vcltq_s32(q3s32, qEmptys32); + + q8s32 = vreinterpretq_s32_u32(q8u32); + q9s32 = vreinterpretq_s32_u32(q9u32); + q10s32 = vreinterpretq_s32_u32(q10u32); + q11s32 = vreinterpretq_s32_u32(q11u32); + + q0s32 = vsubq_s32(q0s32, q8s32); + q1s32 = vsubq_s32(q1s32, q9s32); + q2s32 = vsubq_s32(q2s32, q10s32); + q3s32 = vsubq_s32(q3s32, q11s32); + + q8s32 = vaddq_s32(q0s32, q15s32); + q9s32 = vaddq_s32(q1s32, q15s32); + q10s32 = vaddq_s32(q2s32, q15s32); + q11s32 = vaddq_s32(q3s32, q15s32); + + d0s16 = vshrn_n_s32(q8s32, 3); + d1s16 = vshrn_n_s32(q9s32, 3); + d2s16 = vshrn_n_s32(q10s32, 3); + d3s16 = vshrn_n_s32(q11s32, 3); + + q0s16 = vcombine_s16(d0s16, d1s16); + q1s16 = vcombine_s16(d2s16, d3s16); + + vst1q_s16(output, q0s16); + vst1q_s16(output + 8, q1s16); + return; +} diff --git a/source/libvpx/vp8/encoder/onyx_if.c b/source/libvpx/vp8/encoder/onyx_if.c index 93f8825..38b8999 100644 --- a/source/libvpx/vp8/encoder/onyx_if.c +++ b/source/libvpx/vp8/encoder/onyx_if.c @@ -615,19 +615,21 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) cpi->cyclic_refresh_mode_index = i; #if CONFIG_TEMPORAL_DENOISING - if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive && - Q < (int)cpi->denoiser.denoise_pars.qp_thresh) { - // Under aggressive denoising mode, use segmentation to turn off loop - // filter below some qp thresh. The loop filter is turned off for all - // blocks that have been encoded as ZEROMV LAST x frames in a row, - // where x is set by cpi->denoiser.denoise_pars.consec_zerolast. - // This is to avoid "dot" artifacts that can occur from repeated - // loop filtering on noisy input source. - cpi->cyclic_refresh_q = Q; - lf_adjustment = -MAX_LOOP_FILTER; - for (i = 0; i < mbs_in_frame; ++i) { - seg_map[i] = (cpi->consec_zero_last[i] > - cpi->denoiser.denoise_pars.consec_zerolast) ? 1 : 0; + if (cpi->oxcf.noise_sensitivity > 0) { + if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive && + Q < (int)cpi->denoiser.denoise_pars.qp_thresh) { + // Under aggressive denoising, use segmentation to turn off loop + // filter below some qp thresh. The filter is turned off for all + // blocks that have been encoded as ZEROMV LAST x frames in a row, + // where x is set by cpi->denoiser.denoise_pars.consec_zerolast. + // This is to avoid "dot" artifacts that can occur from repeated + // loop filtering on noisy input source. + cpi->cyclic_refresh_q = Q; + lf_adjustment = -MAX_LOOP_FILTER; + for (i = 0; i < mbs_in_frame; ++i) { + seg_map[i] = (cpi->consec_zero_last[i] > + cpi->denoiser.denoise_pars.consec_zerolast) ? 1 : 0; + } } } #endif @@ -3301,15 +3303,17 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { int skip = 2; // Only select blocks for computing nmse that have been encoded // as ZERO LAST min_consec_zero_last frames in a row. - int min_consec_zero_last = 10; + // Scale with number of temporal layers. + int min_consec_zero_last = 8 / cpi->oxcf.number_of_layers; // Decision is tested for changing the denoising mode every // num_mode_change times this function is called. Note that this // function called every 8 frames, so (8 * num_mode_change) is number // of frames where denoising mode change is tested for switch. int num_mode_change = 15; // Framerate factor, to compensate for larger mse at lower framerates. - // TODO(marpan): Adjust this factor, - int fac_framerate = cpi->output_framerate < 25.0f ? 80 : 100; + // Use ref_framerate, which is full source framerate for temporal layers. + // TODO(marpan): Adjust this factor. + int fac_framerate = cpi->ref_framerate < 25.0f ? 80 : 100; int tot_num_blocks = cm->mb_rows * cm->mb_cols; int ystride = cpi->Source->y_stride; unsigned char *src = cpi->Source->y_buffer; @@ -3378,13 +3382,13 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { // num_mode_change. if (cpi->denoiser.nmse_source_diff_count == num_mode_change) { // Check for going up: from normal to aggressive mode. - if ((cpi->denoiser.denoiser_mode = kDenoiserOnYUV) && + if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUV) && (cpi->denoiser.nmse_source_diff > cpi->denoiser.threshold_aggressive_mode)) { vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive); } else { // Check for going down: from aggressive to normal mode. - if ((cpi->denoiser.denoiser_mode = kDenoiserOnYUVAggressive) && + if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) && (cpi->denoiser.nmse_source_diff < cpi->denoiser.threshold_aggressive_mode)) { vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV); diff --git a/source/libvpx/vp8/vp8_cx_iface.c b/source/libvpx/vp8/vp8_cx_iface.c index 0c522bd..2f394ef 100644 --- a/source/libvpx/vp8/vp8_cx_iface.c +++ b/source/libvpx/vp8/vp8_cx_iface.c @@ -650,7 +650,6 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, ctx->priv = &priv->base; ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; ctx->priv->alg_priv = priv; ctx->priv->init_flags = ctx->init_flags; diff --git a/source/libvpx/vp8/vp8_dx_iface.c b/source/libvpx/vp8/vp8_dx_iface.c index c76ac14..0deda50 100644 --- a/source/libvpx/vp8/vp8_dx_iface.c +++ b/source/libvpx/vp8/vp8_dx_iface.c @@ -84,7 +84,6 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx) (vpx_codec_priv_t *)vpx_memalign(8, sizeof(vpx_codec_alg_priv_t)); vpx_memset(ctx->priv, 0, sizeof(vpx_codec_alg_priv_t)); ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; ctx->priv->alg_priv = (vpx_codec_alg_priv_t *)ctx->priv; ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); ctx->priv->alg_priv->decrypt_cb = NULL; diff --git a/source/libvpx/vp8/vp8cx_arm.mk b/source/libvpx/vp8/vp8cx_arm.mk index 5733048..0b3eed0 100644 --- a/source/libvpx/vp8/vp8cx_arm.mk +++ b/source/libvpx/vp8/vp8cx_arm.mk @@ -38,9 +38,9 @@ VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM) VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/fastquantizeb_neon$(ASM) VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/picklpf_arm.c VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/shortfdct_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/subtract_neon$(ASM) VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_mse16x16_neon$(ASM) VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_memcpy_neon$(ASM) -VP8_CX_SRCS-$(HAVE_NEON_ASM) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c +VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c +VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon.c diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl index 708f41b..92f9318 100644 --- a/source/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc"; $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/; +specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/; $vp9_convolve8_neon_asm=vp9_convolve8_neon; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/; +specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/; $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/; +specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/; $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; diff --git a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index d109e13..3bc7d39 100644 --- a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -307,7 +307,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, __m256i addFilterReg64; __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10; - __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32; + __m256i srcReg32b11, srcReg32b12, filtersReg32; __m256i firstFilters, secondFilters, thirdFilters, forthFilters; unsigned int i; unsigned int src_stride, dst_stride; @@ -409,35 +409,35 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, // multiply 2 adjacent elements with the filter and add the result srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters); srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters); - srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); // add and saturate the results together srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8); - // multiply 2 adjacent elements with the filter and add the result srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); - - // multiply 2 adjacent elements with the filter and add the result srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters); - srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); - // add and saturate the results together srcReg32b10 = _mm256_adds_epi16(srcReg32b10, _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_min_epi16(srcReg32b6, srcReg32b13)); - - // add and saturate the results together srcReg32b10 = _mm256_adds_epi16(srcReg32b10, _mm256_max_epi16(srcReg32b8, srcReg32b12)); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_max_epi16(srcReg32b6, srcReg32b13)); + // multiply 2 adjacent elements with the filter and add the result + srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); + srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); + + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6); + + // multiply 2 adjacent elements with the filter and add the result + srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); + srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); + + // add and saturate the results together + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, + _mm256_min_epi16(srcReg32b8, srcReg32b12)); + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, + _mm256_max_epi16(srcReg32b8, srcReg32b12)); srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64); srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64); diff --git a/source/libvpx/vp9/decoder/vp9_decoder.c b/source/libvpx/vp9/decoder/vp9_decoder.c index 1a41558..e79dcf3 100644 --- a/source/libvpx/vp9/decoder/vp9_decoder.c +++ b/source/libvpx/vp9/decoder/vp9_decoder.c @@ -314,3 +314,67 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, vp9_clear_system_state(); return ret; } + +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + + uint8_t marker; + + assert(data_sz); + marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); + *count = 0; + + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) + return VPX_CODEC_CORRUPT_FRAME; + + { + const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, + data + data_sz - index_sz); + + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) + return VPX_CODEC_CORRUPT_FRAME; + } + + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + + // Frames has a maximum of 8 and mag has a maximum of 4. + uint8_t clear_buffer[32]; + assert(sizeof(clear_buffer) >= frames * mag); + if (decrypt_cb) { + decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); + x = clear_buffer; + } + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) + this_sz |= (*x++) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; +} diff --git a/source/libvpx/vp9/decoder/vp9_decoder.h b/source/libvpx/vp9/decoder/vp9_decoder.h index 223b66f..848d212 100644 --- a/source/libvpx/vp9/decoder/vp9_decoder.h +++ b/source/libvpx/vp9/decoder/vp9_decoder.h @@ -78,6 +78,25 @@ struct VP9Decoder *vp9_decoder_create(); void vp9_decoder_remove(struct VP9Decoder *pbi); +static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, + void *decrypt_state, + const uint8_t *data) { + if (decrypt_cb) { + uint8_t marker; + decrypt_cb(decrypt_state, data, &marker, 1); + return marker; + } + return *data; +} + +// This function is exposed for use in tests, as well as the inlined function +// "read_marker". +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state); + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c index 2d5ec79..8c13d0d 100644 --- a/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -28,7 +28,6 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - int i; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; @@ -39,7 +38,7 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, if (!skip_block) { // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. - + int i; const int16x8_t v_zero = vdupq_n_s16(0); const int16x8_t v_one = vdupq_n_s16(1); int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1); @@ -50,13 +49,37 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, v_round = vsetq_lane_s16(round_ptr[0], v_round, 0); v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0); v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0); - - for (i = 0; i < count; i += 8) { + // process dc and the first seven ac coeffs + { + const int16x8_t v_iscan = vld1q_s16(&iscan[0]); + const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]); + const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); + const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), + vget_low_s16(v_quant)); + const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), + vget_high_s16(v_quant)); + const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), + vshrn_n_s32(v_tmp_hi, 16)); + const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); + const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); + const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); + const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); + const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); + const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); + vst1q_s16(&qcoeff_ptr[0], v_qcoeff); + vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff); + v_round = vmovq_n_s16(round_ptr[1]); + v_quant = vmovq_n_s16(quant_ptr[1]); + v_dequant = vmovq_n_s16(dequant_ptr[1]); + } + // now process the rest of the ac coeffs + for (i = 8; i < count; i += 8) { const int16x8_t v_iscan = vld1q_s16(&iscan[i]); const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_abs_coeff = vabsq_s16(v_coeff); - const int16x8_t v_tmp = vqaddq_s16(v_abs_coeff, v_round); + const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), @@ -65,19 +88,13 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, vshrn_n_s32(v_tmp_hi, 16)); const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = - vandq_s16(vmvnq_s16(vreinterpretq_s16_u16(v_nz_mask)), v_iscan_plus1); + const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[i], v_qcoeff); vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff); - v_round = vmovq_n_s16(round_ptr[1]); - v_quant = vmovq_n_s16(quant_ptr[1]); - v_dequant = vmovq_n_s16(dequant_ptr[1]); } { const int16x4_t v_eobmax_3210 = diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c index a304732..711354b 100644 --- a/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -1723,7 +1723,8 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { // function so repeat calls can accumulate a min and max of more than one sb64. static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, BLOCK_SIZE *min_block_size, - BLOCK_SIZE *max_block_size ) { + BLOCK_SIZE *max_block_size, + int bs_hist[BLOCK_SIZES]) { int sb_width_in_blocks = MI_BLOCK_SIZE; int sb_height_in_blocks = MI_BLOCK_SIZE; int i, j; @@ -1734,6 +1735,7 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, for (j = 0; j < sb_width_in_blocks; ++j) { MODE_INFO * mi = mi_8x8[index+j]; BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; + bs_hist[sb_type]++; *min_block_size = MIN(*min_block_size, sb_type); *max_block_size = MAX(*max_block_size, sb_type); } @@ -1766,6 +1768,9 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE max_size = BLOCK_64X64; + int i = 0; + int bs_hist[BLOCK_SIZES] = {0}; + // Trap case where we do not have a prediction. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" @@ -1778,22 +1783,51 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, if (cm->frame_type != KEY_FRAME) { MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; - get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size); + get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); } // Find the min and max partition sizes used in the left SB64 if (left_in_image) { MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; - get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size); + get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, + bs_hist); } // Find the min and max partition sizes used in the above SB64. if (above_in_image) { MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; - get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size); + get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, + bs_hist); } + // adjust observed min and max if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { min_size = min_partition_size[min_size]; max_size = max_partition_size[max_size]; + } else if (cpi->sf.auto_min_max_partition_size == + CONSTRAIN_NEIGHBORING_MIN_MAX) { + // adjust the search range based on the histogram of the observed + // partition sizes from left, above the previous co-located blocks + int sum = 0; + int first_moment = 0; + int second_moment = 0; + int var_unnormalized = 0; + + for (i = 0; i < BLOCK_SIZES; i++) { + sum += bs_hist[i]; + first_moment += bs_hist[i] * i; + second_moment += bs_hist[i] * i * i; + } + + // if variance is small enough, + // adjust the range around its mean size, which gives a tighter range + var_unnormalized = second_moment - first_moment * first_moment / sum; + if (var_unnormalized <= 4 * sum) { + int mean = first_moment / sum; + min_size = min_partition_size[mean]; + max_size = max_partition_size[mean]; + } else { + min_size = min_partition_size[min_size]; + max_size = max_partition_size[max_size]; + } } } @@ -1810,6 +1844,7 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, next_square_size[max_size] < min_size) { min_size = next_square_size[max_size]; } + *min_block_size = min_size; *max_block_size = max_size; } diff --git a/source/libvpx/vp9/encoder/vp9_encoder.c b/source/libvpx/vp9/encoder/vp9_encoder.c index 1405bf3..d27620c 100644 --- a/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/source/libvpx/vp9/encoder/vp9_encoder.c @@ -489,14 +489,6 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) { vp9_rc_update_framerate(cpi); } -int64_t vp9_rescale(int64_t val, int64_t num, int denom) { - int64_t llnum = num; - int64_t llden = denom; - int64_t llval = val; - - return (llval * llnum / llden); -} - static void set_tile_limits(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -533,10 +525,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if ((cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.rc_mode == VPX_CBR) || - (cpi->svc.number_spatial_layers > 1 && - cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) { + if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || + (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) { vp9_init_layer_context(cpi); } @@ -551,6 +541,20 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { set_tile_limits(cpi); } +static void set_rc_buffer_sizes(RATE_CONTROL *rc, + const VP9EncoderConfig *oxcf) { + const int64_t bandwidth = oxcf->target_bandwidth; + const int64_t starting = oxcf->starting_buffer_level_ms; + const int64_t optimal = oxcf->optimal_buffer_level_ms; + const int64_t maximum = oxcf->maximum_buffer_size_ms; + + rc->starting_buffer_level = starting * bandwidth / 1000; + rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8 + : optimal * bandwidth / 1000; + rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8 + : maximum * bandwidth / 1000; +} + void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -584,28 +588,8 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { } cpi->encode_breakout = cpi->oxcf.encode_breakout; - // local file playback mode == really big buffer - if (cpi->oxcf.rc_mode == VPX_VBR) { - cpi->oxcf.starting_buffer_level_ms = 60000; - cpi->oxcf.optimal_buffer_level_ms = 60000; - cpi->oxcf.maximum_buffer_size_ms = 240000; - } - - rc->starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level_ms, - cpi->oxcf.target_bandwidth, 1000); + set_rc_buffer_sizes(rc, &cpi->oxcf); - // Set or reset optimal and maximum buffer levels. - if (cpi->oxcf.optimal_buffer_level_ms == 0) - rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; - else - rc->optimal_buffer_level = vp9_rescale(cpi->oxcf.optimal_buffer_level_ms, - cpi->oxcf.target_bandwidth, 1000); - - if (cpi->oxcf.maximum_buffer_size_ms == 0) - rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; - else - rc->maximum_buffer_size = vp9_rescale(cpi->oxcf.maximum_buffer_size_ms, - cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); @@ -731,11 +715,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); cm->current_video_frame = 0; - - cpi->gold_is_last = 0; - cpi->alt_is_last = 0; - cpi->gold_is_alt = 0; - cpi->skippable_frame = 0; // Create the encoder segmentation map and set all entries to 0 @@ -1916,36 +1895,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } while (loop); } -static void get_ref_frame_flags(VP9_COMP *cpi) { - if (cpi->refresh_last_frame & cpi->refresh_golden_frame) - cpi->gold_is_last = 1; - else if (cpi->refresh_last_frame ^ cpi->refresh_golden_frame) - cpi->gold_is_last = 0; - - if (cpi->refresh_last_frame & cpi->refresh_alt_ref_frame) - cpi->alt_is_last = 1; - else if (cpi->refresh_last_frame ^ cpi->refresh_alt_ref_frame) - cpi->alt_is_last = 0; - - if (cpi->refresh_alt_ref_frame & cpi->refresh_golden_frame) - cpi->gold_is_alt = 1; - else if (cpi->refresh_alt_ref_frame ^ cpi->refresh_golden_frame) - cpi->gold_is_alt = 0; +static int get_ref_frame_flags(const VP9_COMP *cpi) { + const int *const map = cpi->common.ref_frame_map; + const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; + const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; + const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; + int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - if (cpi->gold_is_last) - cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; + if (gold_is_last) + flags &= ~VP9_GOLD_FLAG; if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_spatial_svc(cpi)) - cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; + flags &= ~VP9_GOLD_FLAG; + + if (alt_is_last) + flags &= ~VP9_ALT_FLAG; - if (cpi->alt_is_last) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; + if (gold_is_alt) + flags &= ~VP9_ALT_FLAG; - if (cpi->gold_is_alt) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; + return flags; } static void set_ext_overrides(VP9_COMP *cpi) { @@ -2014,19 +1984,41 @@ static void set_arf_sign_bias(VP9_COMP *cpi) { cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias; } +static void set_mv_search_params(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + const unsigned int max_mv_def = MIN(cm->width, cm->height); + + // Default based on max resolution. + cpi->mv_step_param = vp9_init_search_range(max_mv_def); + + if (cpi->sf.mv.auto_mv_step_size) { + if (frame_is_intra_only(cm)) { + // Initialize max_mv_magnitude for use in the first INTER frame + // after a key/intra-only frame. + cpi->max_mv_magnitude = max_mv_def; + } else { + if (cm->show_frame) + // Allow mv_steps to correspond to twice the max mv magnitude found + // in the previous frame, capped by the default max_mv_magnitude based + // on resolution. + cpi->mv_step_param = + vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude)); + cpi->max_mv_magnitude = 0; + } + } +} + static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { VP9_COMMON *const cm = &cpi->common; + struct segmentation *const seg = &cm->seg; TX_SIZE t; int q; int top_index; int bottom_index; - const SPEED_FEATURES *const sf = &cpi->sf; - const unsigned int max_mv_def = MIN(cm->width, cm->height); - struct segmentation *const seg = &cm->seg; set_ext_overrides(cpi); cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, @@ -2052,24 +2044,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set default state for segment based loop filter update flags. cm->lf.mode_ref_delta_update = 0; - // Initialize cpi->mv_step_param to default based on max resolution. - cpi->mv_step_param = vp9_init_search_range(max_mv_def); - // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate. - if (sf->mv.auto_mv_step_size) { - if (frame_is_intra_only(cm)) { - // Initialize max_mv_magnitude for use in the first INTER frame - // after a key/intra-only frame. - cpi->max_mv_magnitude = max_mv_def; - } else { - if (cm->show_frame) - // Allow mv_steps to correspond to twice the max mv magnitude found - // in the previous frame, capped by the default max_mv_magnitude based - // on resolution. - cpi->mv_step_param = vp9_init_search_range(MIN(max_mv_def, 2 * - cpi->max_mv_magnitude)); - cpi->max_mv_magnitude = 0; - } - } + set_mv_search_params(cpi); // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { @@ -2247,7 +2222,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, else cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; - get_ref_frame_flags(cpi); + cpi->ref_frame_flags = get_ref_frame_flags(cpi); cm->last_frame_type = cm->frame_type; vp9_rc_postencode_update(cpi, *size); diff --git a/source/libvpx/vp9/encoder/vp9_encoder.h b/source/libvpx/vp9/encoder/vp9_encoder.h index 33e9adb..82be0f4 100644 --- a/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/source/libvpx/vp9/encoder/vp9_encoder.h @@ -82,36 +82,19 @@ typedef enum { } VPX_SCALING; typedef enum { - // Good Quality Fast Encoding. The encoder balances quality with the - // amount of time it takes to encode the output. (speed setting - // controls how fast) - ONE_PASS_GOOD = 1, - - // One Pass - Best Quality. The encoder places priority on the - // quality of the output over encoding speed. The output is compressed - // at the highest possible quality. This option takes the longest - // amount of time to encode. (speed setting ignored) - ONE_PASS_BEST = 2, - - // Two Pass - First Pass. The encoder generates a file of statistics - // for use in the second encoding pass. (speed setting controls how fast) - TWO_PASS_FIRST = 3, - - // Two Pass - Second Pass. The encoder uses the statistics that were - // generated in the first encoding pass to create the compressed - // output. (speed setting controls how fast) - TWO_PASS_SECOND_GOOD = 4, - - // Two Pass - Second Pass Best. The encoder uses the statistics that - // were generated in the first encoding pass to create the compressed - // output using the highest possible quality, and taking a - // longer amount of time to encode. (speed setting ignored) - TWO_PASS_SECOND_BEST = 5, - - // Realtime/Live Encoding. This mode is optimized for realtime - // encoding (for example, capturing a television signal or feed from - // a live camera). (speed setting controls how fast) - REALTIME = 6, + // Good Quality Fast Encoding. The encoder balances quality with the amount of + // time it takes to encode the output. Speed setting controls how fast. + GOOD, + + // The encoder places priority on the quality of the output over encoding + // speed. The output is compressed at the highest possible quality. This + // option takes the longest amount of time to encode. Speed setting ignored. + BEST, + + // Realtime/Live Encoding. This mode is optimized for realtime encoding (for + // example, capturing a television signal or feed from a live camera). Speed + // setting controls how fast. + REALTIME } MODE; typedef enum { @@ -241,7 +224,7 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { } static INLINE int is_best_mode(MODE mode) { - return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; + return mode == BEST; } typedef struct VP9_COMP { @@ -261,10 +244,6 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG *unscaled_last_source; YV12_BUFFER_CONFIG scaled_last_source; - int gold_is_last; // gold same as last frame ( short circuit gold searches) - int alt_is_last; // Alt same as last ( short circuit altref search) - int gold_is_alt; // don't do both alt and gold search ( just do gold). - int skippable_frame; int scaled_ref_idx[3]; @@ -495,14 +474,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( .buf; } -// Intra only frames, golden frames (except alt ref overlays) and -// alt ref frames tend to be coded at a higher than ambient quality -static INLINE int frame_is_boosted(const VP9_COMP *cpi) { - return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || - vp9_is_upper_layer_key_frame(cpi); -} - static INLINE int get_token_alloc(int mb_rows, int mb_cols) { // TODO(JBB): double check we can't exceed this token count if we have a // 32x32 transform crossing a boundary at a multiple of 16. @@ -520,8 +491,6 @@ void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); -int64_t vp9_rescale(int64_t val, int64_t num, int denom); - void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c index 295e437..94bbe9c 100644 --- a/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -432,6 +432,8 @@ void vp9_first_pass(VP9_COMP *cpi) { TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; + LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? + &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -444,15 +446,14 @@ void vp9_first_pass(VP9_COMP *cpi) { set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex()); - if (is_spatial_svc(cpi)) { + if (lc != NULL) { MV_REFERENCE_FRAME ref_frame = LAST_FRAME; const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; - twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + twopass = &lc->twopass; if (cpi->common.current_video_frame == 0) { cpi->ref_frame_flags = 0; } else { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = VP9_GOLD_FLAG; else @@ -613,7 +614,7 @@ void vp9_first_pass(VP9_COMP *cpi) { &unscaled_last_source_buf_2d); // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25 || is_spatial_svc(cpi)) { + if (raw_motion_error > 25 || lc != NULL) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, @@ -895,7 +896,7 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_extend_frame_borders(new_yv12); - if (is_spatial_svc(cpi)) { + if (lc != NULL) { vp9_update_reference_frames(cpi); } else { // Swap frame pointers so last frame refers to the frame we just compressed. @@ -1081,8 +1082,7 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. -static double get_zero_motion_factor(const VP9_COMMON *cm, - const FIRSTPASS_STATS *frame) { +static double get_zero_motion_factor(const FIRSTPASS_STATS *frame) { const double sr_ratio = frame->coded_error / DOUBLE_DIVIDE_CHECK(frame->sr_coded_error); const double zero_motion_pct = frame->pcnt_inter - @@ -1095,12 +1095,10 @@ static double get_zero_motion_factor(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(TWO_PASS *twopass, +static int detect_transition_to_still(const TWO_PASS *twopass, int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { - int trans_to_still = 0; - // Break clause to detect very still sections after motion // For example a static image after a fade or other transition // instead of a clean scene cut. @@ -1108,26 +1106,22 @@ static int detect_transition_to_still(TWO_PASS *twopass, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - const FIRSTPASS_STATS *position = twopass->stats_in; - FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(twopass, &tmp_next_frame)) + const FIRSTPASS_STATS *stats = &twopass->stats_in[j]; + if (stats >= twopass->stats_in_end) break; - if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) + if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; } - reset_fpf_position(twopass, position); - // Only if it does do we signal a transition to still. - if (j == still_interval) - trans_to_still = 1; + return j == still_interval; } - return trans_to_still; + return 0; } // This function detects a flash through the high relative pcnt_second_ref @@ -1373,7 +1367,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, double group_error, int gf_arf_bits) { RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *twopass = &cpi->twopass; + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS frame_stats; int i; int frame_index = 1; @@ -1396,17 +1391,17 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // is also the golden frame. if (!key_frame) { if (rc->source_alt_ref_active) { - twopass->gf_group.update_type[0] = OVERLAY_UPDATE; - twopass->gf_group.rf_level[0] = INTER_NORMAL; - twopass->gf_group.bit_allocation[0] = 0; - twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0]; - twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0]; + gf_group->update_type[0] = OVERLAY_UPDATE; + gf_group->rf_level[0] = INTER_NORMAL; + gf_group->bit_allocation[0] = 0; + gf_group->arf_update_idx[0] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; } else { - twopass->gf_group.update_type[0] = GF_UPDATE; - twopass->gf_group.rf_level[0] = GF_ARF_STD; - twopass->gf_group.bit_allocation[0] = gf_arf_bits; - twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0]; - twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0]; + gf_group->update_type[0] = GF_UPDATE; + gf_group->rf_level[0] = GF_ARF_STD; + gf_group->bit_allocation[0] = gf_arf_bits; + gf_group->arf_update_idx[0] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; } // Step over the golden frame / overlay frame @@ -1421,25 +1416,25 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // Store the bits to spend on the ARF if there is one. if (rc->source_alt_ref_pending) { - twopass->gf_group.update_type[frame_index] = ARF_UPDATE; - twopass->gf_group.rf_level[frame_index] = GF_ARF_STD; - twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits; - twopass->gf_group.arf_src_offset[frame_index] = + gf_group->update_type[frame_index] = ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + gf_group->bit_allocation[frame_index] = gf_arf_bits; + gf_group->arf_src_offset[frame_index] = (unsigned char)(rc->baseline_gf_interval - 1); - twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0]; - twopass->gf_group.arf_ref_idx[frame_index] = + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[cpi->multi_arf_last_grp_enabled && rc->source_alt_ref_active]; ++frame_index; if (cpi->multi_arf_enabled) { // Set aside a slot for a level 1 arf. - twopass->gf_group.update_type[frame_index] = ARF_UPDATE; - twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW; - twopass->gf_group.arf_src_offset[frame_index] = + gf_group->update_type[frame_index] = ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_LOW; + gf_group->arf_src_offset[frame_index] = (unsigned char)((rc->baseline_gf_interval >> 1) - 1); - twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1]; - twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; ++frame_index; } } @@ -1469,16 +1464,16 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, if (frame_index <= mid_frame_idx) arf_idx = 1; } - twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; - twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; target_frame_size = clamp(target_frame_size, 0, MIN(max_bits, (int)total_group_bits)); - twopass->gf_group.update_type[frame_index] = LF_UPDATE; - twopass->gf_group.rf_level[frame_index] = INTER_NORMAL; + gf_group->update_type[frame_index] = LF_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; - twopass->gf_group.bit_allocation[frame_index] = target_frame_size; + gf_group->bit_allocation[frame_index] = target_frame_size; ++frame_index; } @@ -1486,23 +1481,23 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // We need to configure the frame at the end of the sequence + 1 that will be // the start frame for the next group. Otherwise prior to the call to // vp9_rc_get_second_pass_params() the data will be undefined. - twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0]; - twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; if (rc->source_alt_ref_pending) { - twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE; - twopass->gf_group.rf_level[frame_index] = INTER_NORMAL; + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; // Final setup for second arf and its overlay. if (cpi->multi_arf_enabled) { - twopass->gf_group.bit_allocation[2] = - twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits; - twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE; - twopass->gf_group.bit_allocation[mid_frame_idx] = 0; + gf_group->bit_allocation[2] = + gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits; + gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE; + gf_group->bit_allocation[mid_frame_idx] = 0; } } else { - twopass->gf_group.update_type[frame_index] = GF_UPDATE; - twopass->gf_group.rf_level[frame_index] = GF_ARF_STD; + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; } // Note whether multi-arf was enabled this group for next time. @@ -1554,8 +1549,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vp9_clear_system_state(); vp9_zero(next_frame); - gf_group_bits = 0; - // Load stats for the current frame. mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); @@ -1615,9 +1608,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { decay_accumulator = decay_accumulator * loop_decay_rate; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, - get_zero_motion_factor(&cpi->common, &next_frame)); + zero_motion_accumulator = MIN(zero_motion_accumulator, + get_zero_motion_factor(&next_frame)); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. @@ -1831,6 +1823,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const FIRSTPASS_STATS first_frame = *this_frame; const FIRSTPASS_STATS *const start_position = twopass->stats_in; @@ -1849,7 +1842,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->common.frame_type = KEY_FRAME; // Reset the GF group data structures. - vp9_zero(twopass->gf_group); + vp9_zero(*gf_group); // Is this a forced key frame by interval. rc->this_key_frame_forced = rc->next_key_frame_forced; @@ -1987,9 +1980,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, - get_zero_motion_factor(&cpi->common, &next_frame)); + zero_motion_accumulator =MIN(zero_motion_accumulator, + get_zero_motion_factor(&next_frame)); // For the first few frames collect data to decide kf boost. if (i <= (rc->max_gf_interval * 2)) { @@ -2040,9 +2032,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits -= kf_bits; // Save the bits to spend on the key frame. - twopass->gf_group.bit_allocation[0] = kf_bits; - twopass->gf_group.update_type[0] = KF_UPDATE; - twopass->gf_group.rf_level[0] = KF_STD; + gf_group->bit_allocation[0] = kf_bits; + gf_group->update_type[0] = KF_UPDATE; + gf_group->rf_level[0] = KF_STD; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); @@ -2119,15 +2111,16 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; int frames_left; FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; int target_rate; - LAYER_CONTEXT *lc = NULL; + LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? + &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; - if (is_spatial_svc(cpi)) { - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (lc != NULL) { frames_left = (int)(twopass->total_stats.count - lc->current_video_frame_in_layer); } else { @@ -2140,10 +2133,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // If this is an arf frame then we dont want to read the stats file or // advance the input pointer as we already have what we need. - if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) { + if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { int target_rate; configure_buffer_updates(cpi); - target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index]; + target_rate = gf_group->bit_allocation[gf_group->index]; target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); rc->base_frame_target = target_rate; @@ -2154,7 +2147,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_rc_set_frame_target(cpi, target_rate); cm->frame_type = INTER_FRAME; - if (is_spatial_svc(cpi)) { + if (lc != NULL) { if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = 0; } else { @@ -2170,7 +2163,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); - if (is_spatial_svc(cpi) && twopass->kf_intra_err_min == 0) { + if (lc != NULL && twopass->kf_intra_err_min == 0) { twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; } @@ -2178,8 +2171,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (cpi->oxcf.rc_mode == VPX_Q) { twopass->active_worst_quality = cpi->oxcf.cq_level; } else if (cm->current_video_frame == 0 || - (is_spatial_svc(cpi) && - lc->current_video_frame_in_layer == 0)) { + (lc != NULL && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2205,7 +2197,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cm->frame_type = INTER_FRAME; } - if (is_spatial_svc(cpi)) { + if (lc != NULL) { if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = (cm->frame_type == KEY_FRAME); if (lc->is_key_frame) @@ -2236,13 +2228,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } rc->frames_till_gf_update_due = rc->baseline_gf_interval; - if (!is_spatial_svc(cpi)) + if (lc != NULL) cpi->refresh_golden_frame = 1; } configure_buffer_updates(cpi); - target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index]; + target_rate = gf_group->bit_allocation[gf_group->index]; if (cpi->common.frame_type == KEY_FRAME) target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); else diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c index e5469c8..5646f5b 100644 --- a/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -254,7 +254,8 @@ static int get_pred_buffer(PRED_BUFFER *p, int len) { } static void free_pred_buffer(PRED_BUFFER *p) { - p->in_use = 0; + if (p != NULL) + p->in_use = 0; } static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, @@ -671,8 +672,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, skip_txfm = x->skip_txfm[0]; if (cpi->sf.reuse_inter_pred_sby) { - if (best_pred != NULL) - free_pred_buffer(best_pred); + free_pred_buffer(best_pred); best_pred = this_mode_pred; } @@ -692,7 +692,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // If best prediction is not in dst buf, then copy the prediction block from // temp buf to dst buf. - if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) { + if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby && + best_pred->data != orig_dst.buf) { uint8_t *copy_from, *copy_to; pd->dst = orig_dst; diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c index 9da2ade..b926a58 100644 --- a/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -646,7 +646,6 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int q; if (frame_is_intra_only(cm)) { - active_best_quality = rc->best_quality; // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c index e368037..cfda964 100644 --- a/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -490,24 +490,24 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}}; - TX_SIZE n, m; + int n, m; int s0, s1; const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; int64_t best_rd = INT64_MAX; - TX_SIZE best_tx = TX_4X4; + TX_SIZE best_tx = max_tx_size; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= max_tx_size; n++) { + for (n = max_tx_size; n >= 0; n--) { txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { - for (m = 0; m <= n - (n == max_tx_size); m++) { + for (m = 0; m <= n - (n == (int) max_tx_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(tx_probs[m]); else @@ -523,6 +523,13 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } + // Early termination in transform size search. + if (cpi->sf.tx_size_search_breakout && + (rd[n][1] == INT64_MAX || + (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) || + s[n] == 1)) + break; + if (rd[n][1] < best_rd) { best_tx = n; best_rd = rd[n][1]; @@ -2523,10 +2530,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; PREDICTION_MODE mode_uv[TX_SIZES]; - int64_t mode_distortions[MB_MODE_COUNT] = {-1}; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); - const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; - const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; int best_skip2 = 0; int mode_skip_mask = 0; int mode_skip_start = cpi->sf.mode_skip_start + 1; @@ -2613,18 +2617,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - // TODO(JBB): This is to make up for the fact that we don't have sad - // functions that work when the block size reads outside the umv. We - // should fix this either by making the motion search just work on - // a representative block in the boundary ( first ) and then implement a - // function that does sads when inside the border.. - if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) { - const int new_modes_mask = - (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) | - (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA); - mode_skip_mask |= new_modes_mask; - } - if (bsize > cpi->sf.max_intra_bsize) { const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) | (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) | @@ -2647,6 +2639,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t total_sse = INT64_MAX; int early_term = 0; + this_mode = vp9_mode_order[mode_index].mode; + ref_frame = vp9_mode_order[mode_index].ref_frame[0]; + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) + continue; + second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (mode_index == mode_skip_start && best_mode_index >= 0) { @@ -2668,6 +2666,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } } + + if (cpi->sf.alt_ref_search_fp && cpi->rc.is_src_frame_alt_ref) { + mode_skip_mask = 0; + if (!(ref_frame == ALTREF_FRAME && second_ref_frame == NONE)) + continue; + } + if (mode_skip_mask & (1 << mode_index)) continue; @@ -2676,12 +2681,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rd_thresh_freq_fact[mode_index])) continue; - this_mode = vp9_mode_order[mode_index].mode; - ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) - continue; - second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; - if (cpi->sf.motion_field_mode_search) { const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], tile->mi_col_end - mi_col); @@ -2909,12 +2908,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); } - // Store the respective mode distortions for later use. - if (mode_distortions[this_mode] == -1 - || distortion2 < mode_distortions[this_mode]) { - mode_distortions[this_mode] = distortion2; - } - // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { int max_plane = MAX_MB_PLANE; diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.c b/source/libvpx/vp9/encoder/vp9_speed_features.c index 6ac511d..57835ec 100644 --- a/source/libvpx/vp9/encoder/vp9_speed_features.c +++ b/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -50,8 +50,20 @@ enum { (1 << THR_GOLD) }; +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static int frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || + vp9_is_upper_layer_key_frame(cpi); +} + + static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { + const int boosted = frame_is_boosted(cpi); + sf->adaptive_rd_thresh = 1; sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW; sf->allow_skip_recode = 1; @@ -59,8 +71,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD - : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT @@ -80,9 +90,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + + sf->tx_size_search_breakout = 1; } if (speed >= 2) { + sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + if (MIN(cm->width, cm->height) >= 720) { sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; @@ -102,7 +117,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; } @@ -117,9 +132,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; } sf->adaptive_pred_interp_filter = 0; - sf->cb_partition_search = frame_is_boosted(cpi) ? 0 : 1; + sf->cb_partition_search = !boosted; sf->cb_pred_filter_search = 1; - sf->motion_field_mode_search = frame_is_boosted(cpi) ? 0 : 1; + sf->alt_ref_search_fp = 1; + sf->motion_field_mode_search = !boosted; sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; sf->recode_loop = ALLOW_RECODE_KFMAXBW; @@ -347,6 +363,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->cb_pred_filter_search = 0; sf->cb_partition_search = 0; sf->motion_field_mode_search = 0; + sf->alt_ref_search_fp = 0; sf->use_quant_fp = 0; sf->reference_masking = 0; sf->partition_search_type = SEARCH_PARTITION; @@ -389,6 +406,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // Recode loop tolerence %. sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; + sf->tx_size_search_breakout = 0; if (oxcf->mode == REALTIME) { set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h index 7334874..bad956d 100644 --- a/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -63,7 +63,8 @@ typedef enum { typedef enum { NOT_IN_USE = 0, RELAXED_NEIGHBORING_MIN_MAX = 1, - STRICT_NEIGHBORING_MIN_MAX = 2 + CONSTRAIN_NEIGHBORING_MIN_MAX = 2, + STRICT_NEIGHBORING_MIN_MAX = 3 } AUTO_MIN_MAX_MODE; typedef enum { @@ -290,6 +291,8 @@ typedef struct SPEED_FEATURES { int motion_field_mode_search; + int alt_ref_search_fp; + // Fast quantization process path int use_quant_fp; @@ -373,6 +376,10 @@ typedef struct SPEED_FEATURES { // default interp filter choice INTERP_FILTER default_interp_filter; + + // Early termination in transform size search, which only applies while + // tx_size_search_method is USE_FULL_RD. + int tx_size_search_breakout; } SPEED_FEATURES; struct VP9_COMP; diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c index 52f6cda..fb52d1a 100644 --- a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -69,8 +69,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lc->gold_ref_idx = -1; } - lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms), - lc->target_bandwidth, 1000); + lrc->buffer_level = oxcf->starting_buffer_level_ms * + lc->target_bandwidth / 1000; lrc->bits_off_target = lrc->buffer_level; } diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c index 487deef..b6bcdd9 100644 --- a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c +++ b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c @@ -2891,11 +2891,11 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, int16_t *output, int stride) { #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" -#undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION +#undef FDCT32x32_2D #define FDCT32x32_2D vp9_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT -#undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION +#undef FDCT32x32_2D diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c index cdbb69b..1716053 100644 --- a/source/libvpx/vp9/vp9_cx_iface.c +++ b/source/libvpx/vp9/vp9_cx_iface.c @@ -325,6 +325,7 @@ static vpx_codec_err_t set_encoder_config( VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { + const int is_vbr = cfg->rc_end_usage == VPX_VBR; oxcf->profile = cfg->g_profile; oxcf->width = cfg->g_w; oxcf->height = cfg->g_h; @@ -334,17 +335,16 @@ static vpx_codec_err_t set_encoder_config( if (oxcf->init_framerate > 180) oxcf->init_framerate = 30; + oxcf->mode = BEST; + switch (cfg->g_pass) { case VPX_RC_ONE_PASS: - oxcf->mode = ONE_PASS_GOOD; oxcf->pass = 0; break; case VPX_RC_FIRST_PASS: - oxcf->mode = TWO_PASS_FIRST; oxcf->pass = 1; break; case VPX_RC_LAST_PASS: - oxcf->mode = TWO_PASS_SECOND_BEST; oxcf->pass = 2; break; } @@ -371,9 +371,9 @@ static vpx_codec_err_t set_encoder_config( oxcf->scaled_frame_width = cfg->rc_scaled_width; oxcf->scaled_frame_height = cfg->rc_scaled_height; - oxcf->maximum_buffer_size_ms = cfg->rc_buf_sz; - oxcf->starting_buffer_level_ms = cfg->rc_buf_initial_sz; - oxcf->optimal_buffer_level_ms = cfg->rc_buf_optimal_sz; + oxcf->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz; + oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz; + oxcf->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz; oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh; @@ -668,7 +668,6 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, ctx->priv = &priv->base; ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; ctx->priv->alg_priv = priv; ctx->priv->init_flags = ctx->init_flags; ctx->priv->enc.total_encoders = 1; @@ -718,31 +717,36 @@ static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { return VPX_CODEC_OK; } -static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, +static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, unsigned long duration, unsigned long deadline) { - // Use best quality mode if no deadline is given. - MODE new_qc = ONE_PASS_BEST; - - if (deadline) { - // Convert duration parameter from stream timebase to microseconds - const uint64_t duration_us = (uint64_t)duration * 1000000 * - (uint64_t)ctx->cfg.g_timebase.num / - (uint64_t)ctx->cfg.g_timebase.den; - - // If the deadline is more that the duration this frame is to be shown, - // use good quality mode. Otherwise use realtime mode. - new_qc = (deadline > duration_us) ? ONE_PASS_GOOD : REALTIME; - } + MODE new_mode = BEST; - if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS) - new_qc = TWO_PASS_FIRST; - else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS) - new_qc = (new_qc == ONE_PASS_BEST) ? TWO_PASS_SECOND_BEST - : TWO_PASS_SECOND_GOOD; + switch (ctx->cfg.g_pass) { + case VPX_RC_ONE_PASS: + if (deadline > 0) { + const vpx_codec_enc_cfg_t *const cfg = &ctx->cfg; + + // Convert duration parameter from stream timebase to microseconds. + const uint64_t duration_us = (uint64_t)duration * 1000000 * + (uint64_t)cfg->g_timebase.num /(uint64_t)cfg->g_timebase.den; + + // If the deadline is more that the duration this frame is to be shown, + // use good quality mode. Otherwise use realtime mode. + new_mode = (deadline > duration_us) ? GOOD : REALTIME; + } else { + new_mode = BEST; + } + break; + case VPX_RC_FIRST_PASS: + break; + case VPX_RC_LAST_PASS: + new_mode = deadline > 0 ? GOOD : BEST; + break; + } - if (ctx->oxcf.mode != new_qc) { - ctx->oxcf.mode = new_qc; + if (ctx->oxcf.mode != new_mode) { + ctx->oxcf.mode = new_mode; vp9_change_config(ctx->cpi, &ctx->oxcf); } } diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c index 4372ac9..bb2bb10 100644 --- a/source/libvpx/vp9/vp9_dx_iface.c +++ b/source/libvpx/vp9/vp9_dx_iface.c @@ -66,7 +66,6 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, ctx->priv = (vpx_codec_priv_t *)alg_priv; ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; ctx->priv->alg_priv = alg_priv; ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); ctx->priv->init_flags = ctx->init_flags; @@ -332,81 +331,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } -static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, - void *decrypt_state, - const uint8_t *data) { - if (decrypt_cb) { - uint8_t marker; - decrypt_cb(decrypt_state, data, &marker, 1); - return marker; - } - return *data; -} - -static vpx_codec_err_t parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // A chunk ending with a byte matching 0xc0 is an invalid chunk unless - // it is a super frame index. If the last byte of real video compression - // data is 0xc0 the encoder must add a 0 byte. If we have the marker but - // not the associated matching marker byte at the front of the index we have - // an invalid bitstream and need to return an error. - - uint8_t marker; - - assert(data_sz); - marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); - *count = 0; - - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - - // This chunk is marked as having a superframe index but doesn't have - // enough data for it, thus it's an invalid superframe index. - if (data_sz < index_sz) - return VPX_CODEC_CORRUPT_FRAME; - - { - const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, - data + data_sz - index_sz); - - // This chunk is marked as having a superframe index but doesn't have - // the matching marker byte at the front of the index therefore it's an - // invalid chunk. - if (marker != marker2) - return VPX_CODEC_CORRUPT_FRAME; - } - - { - // Found a valid superframe index. - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; - - // Frames has a maximum of 8 and mag has a maximum of 4. - uint8_t clear_buffer[32]; - assert(sizeof(clear_buffer) >= frames * mag); - if (decrypt_cb) { - decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); - x = clear_buffer; - } - - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; ++j) - this_sz |= (*x++) << (j * 8); - sizes[i] = this_sz; - } - *count = frames; - } - } - return VPX_CODEC_OK; -} - static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { @@ -424,8 +348,8 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, // Reset flushed when receiving a valid frame. ctx->flushed = 0; - res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count, - ctx->decrypt_cb, ctx->decrypt_state); + res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, + ctx->decrypt_cb, ctx->decrypt_state); if (res != VPX_CODEC_OK) return res; diff --git a/source/libvpx/vpx/internal/vpx_codec_internal.h b/source/libvpx/vpx/internal/vpx_codec_internal.h index a7716d1..95119df 100644 --- a/source/libvpx/vpx/internal/vpx_codec_internal.h +++ b/source/libvpx/vpx/internal/vpx_codec_internal.h @@ -338,7 +338,6 @@ typedef struct vpx_codec_priv_cb_pair { */ struct vpx_codec_priv { unsigned int sz; - vpx_codec_iface_t *iface; struct vpx_codec_alg_priv *alg_priv; const char *err_detail; vpx_codec_flags_t init_flags; @@ -347,7 +346,6 @@ struct vpx_codec_priv { vpx_codec_priv_cb_pair_t put_slice_cb; } dec; struct { - int tbd; struct vpx_fixed_buf cx_data_dst_buf; unsigned int cx_data_pad_before; unsigned int cx_data_pad_after; diff --git a/source/libvpx/vpx/src/vpx_decoder.c b/source/libvpx/vpx/src/vpx_decoder.c index 4d22a08..b19c440 100644 --- a/source/libvpx/vpx/src/vpx_decoder.c +++ b/source/libvpx/vpx/src/vpx_decoder.c @@ -54,9 +54,6 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; vpx_codec_destroy(ctx); } - - if (ctx->priv) - ctx->priv->iface = ctx->iface; } return SAVE_STATUS(ctx, res); diff --git a/source/libvpx/vpx/src/vpx_encoder.c b/source/libvpx/vpx/src/vpx_encoder.c index 6e18bd1..5773455 100644 --- a/source/libvpx/vpx/src/vpx_encoder.c +++ b/source/libvpx/vpx/src/vpx_encoder.c @@ -53,9 +53,6 @@ vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; vpx_codec_destroy(ctx); } - - if (ctx->priv) - ctx->priv->iface = ctx->iface; } return SAVE_STATUS(ctx, res); @@ -135,9 +132,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, } } - if (ctx->priv) - ctx->priv->iface = ctx->iface; - if (res) break; diff --git a/source/libvpx/vpx/src/vpx_image.c b/source/libvpx/vpx/src/vpx_image.c index 8c7e3cf..e20703a 100644 --- a/source/libvpx/vpx/src/vpx_image.c +++ b/source/libvpx/vpx/src/vpx_image.c @@ -8,38 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include <stdlib.h> #include <string.h> + #include "vpx/vpx_image.h" #include "vpx/vpx_integer.h" - -#define ADDRESS_STORAGE_SIZE sizeof(size_t) -/*returns an addr aligned to the byte boundary specified by align*/ -#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -/* Memalign code is copied from vpx_mem.c */ -static void *img_buf_memalign(size_t align, size_t size) { - void *addr, - * x = NULL; - - addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE); - - if (addr) { - x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)addr; - } - - return x; -} - -static void img_buf_free(void *memblk) { - if (memblk) { - void *addr = (void *)(((size_t *)memblk)[-1]); - free(addr); - } -} +#include "vpx_mem/vpx_mem.h" static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, @@ -172,7 +146,7 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, if (alloc_size != (size_t)alloc_size) goto fail; - img->img_data = img_buf_memalign(buf_align, (size_t)alloc_size); + img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size); img->img_data_owner = 1; } @@ -296,7 +270,7 @@ void vpx_img_flip(vpx_image_t *img) { void vpx_img_free(vpx_image_t *img) { if (img) { if (img->img_data && img->img_data_owner) - img_buf_free(img->img_data); + vpx_free(img->img_data); if (img->self_allocd) free(img); diff --git a/source/libvpx/vpx_ports/vpx_timer.h b/source/libvpx/vpx_ports/vpx_timer.h index 870338b..dd98e29 100644 --- a/source/libvpx/vpx_ports/vpx_timer.h +++ b/source/libvpx/vpx_ports/vpx_timer.h @@ -11,6 +11,9 @@ #ifndef VPX_PORTS_VPX_TIMER_H_ #define VPX_PORTS_VPX_TIMER_H_ + +#include "./vpx_config.h" + #include "vpx/vpx_integer.h" #if CONFIG_OS_SUPPORT diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c index faee42a..6c822ab 100644 --- a/source/libvpx/vpxdec.c +++ b/source/libvpx/vpxdec.c @@ -15,13 +15,16 @@ #include <string.h> #include <limits.h> +#include "./vpx_config.h" + +#if CONFIG_LIBYUV #include "third_party/libyuv/include/libyuv/scale.h" +#endif #include "./args.h" #include "./ivfdec.h" #define VPX_CODEC_DISABLE_COMPAT 1 -#include "./vpx_config.h" #include "vpx/vpx_decoder.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" @@ -123,6 +126,7 @@ static const arg_def_t *vp8_pp_args[] = { }; #endif +#if CONFIG_LIBYUV static INLINE int vpx_image_scale(vpx_image_t *src, vpx_image_t *dst, FilterModeEnum mode) { assert(src->fmt == VPX_IMG_FMT_I420); @@ -137,6 +141,7 @@ static INLINE int vpx_image_scale(vpx_image_t *src, vpx_image_t *dst, dst->d_w, dst->d_h, mode); } +#endif void usage_exit() { int i; @@ -538,7 +543,8 @@ int main_loop(int argc, const char **argv_) { struct VpxDecInputContext input = {NULL, NULL}; struct VpxInputContext vpx_input_ctx; #if CONFIG_WEBM_IO - struct WebmInputContext webm_ctx = {0}; + struct WebmInputContext webm_ctx; + memset(&(webm_ctx), 0, sizeof(webm_ctx)); input.webm_ctx = &webm_ctx; #endif input.vpx_input_ctx = &vpx_input_ctx; diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c index 7e037a6..b99e61a 100644 --- a/source/libvpx/vpxenc.c +++ b/source/libvpx/vpxenc.c @@ -19,12 +19,15 @@ #include <stdlib.h> #include <string.h> +#if CONFIG_LIBYUV +#include "third_party/libyuv/include/libyuv/scale.h" +#endif + #include "vpx/vpx_encoder.h" #if CONFIG_DECODERS #include "vpx/vpx_decoder.h" #endif -#include "third_party/libyuv/include/libyuv/scale.h" #include "./args.h" #include "./ivfenc.h" #include "./tools_common.h" |